@exaudeus/workrail 3.18.1 → 3.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@
4
4
  <meta charset="UTF-8" />
5
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
6
  <title>WorkRail Console</title>
7
- <script type="module" crossorigin src="/console/assets/index-DMaX2-CW.js"></script>
7
+ <script type="module" crossorigin src="/console/assets/index-QhCFuxQV.js"></script>
8
8
  <link rel="stylesheet" crossorigin href="/console/assets/index-ibLhWBmX.css">
9
9
  </head>
10
10
  <body>
@@ -369,16 +369,16 @@
369
369
  "sha256": "5fe866e54f796975dec5d8ba9983aefd86074db212d3fccd64eed04bc9f0b3da",
370
370
  "bytes": 8011
371
371
  },
372
- "console/assets/index-DMaX2-CW.js": {
373
- "sha256": "9f9fe84d6eac1497afc2564f1b975a3494514c9b7b36b4a32c5473b831e56fbf",
374
- "bytes": 712124
372
+ "console/assets/index-QhCFuxQV.js": {
373
+ "sha256": "0096a86267fdf969abc3e3976ca44b3d3447271c7394345c8d6c27c084e6412c",
374
+ "bytes": 719937
375
375
  },
376
376
  "console/assets/index-ibLhWBmX.css": {
377
377
  "sha256": "346e55635d4d3dc2836dae83edb8563872721bf4b0e7e1ecf47fb9603424c206",
378
378
  "bytes": 58826
379
379
  },
380
380
  "console/index.html": {
381
- "sha256": "c708c996c5682570d44110dbef54b7b2a367bfa3bd99e36b358e94dda74302a4",
381
+ "sha256": "4232baa5128b860125a649d8e135da858f07290bbb9da04861945649b53ac85d",
382
382
  "bytes": 417
383
383
  },
384
384
  "core/error-handler.d.ts": {
@@ -1026,8 +1026,8 @@
1026
1026
  "bytes": 960
1027
1027
  },
1028
1028
  "mcp/server.js": {
1029
- "sha256": "f8414b17dcbf8f48eefd69f932785ce3e3a58a50609eb2309300c018ac6e1418",
1030
- "bytes": 15106
1029
+ "sha256": "a54d39ce8d7282abe16d140abbe1011e67cd3f9d9353f40727c44c1c212a90c2",
1030
+ "bytes": 16152
1031
1031
  },
1032
1032
  "mcp/step-content-envelope.d.ts": {
1033
1033
  "sha256": "19bd63c4d4de1d5d93393d346625d28ffd1bebdc320d4ba4e694cb740ec97d3b",
@@ -1038,12 +1038,12 @@
1038
1038
  "bytes": 483
1039
1039
  },
1040
1040
  "mcp/tool-call-timing.d.ts": {
1041
- "sha256": "b16ef00e70dcb03f3117d3489bef93d1da6c86a6b15678b5d21076316f281a33",
1042
- "bytes": 1116
1041
+ "sha256": "d775153fb5bb7c7b2bc896b2f9a57dc48ee0d0949307778dcdfea293bf0e394b",
1042
+ "bytes": 1320
1043
1043
  },
1044
1044
  "mcp/tool-call-timing.js": {
1045
- "sha256": "2aa6997582d52abb2fdb2bb27a46ab6aca2b26657c2771b153a720e8c322196d",
1046
- "bytes": 2715
1045
+ "sha256": "b3ae45196ab4f9d089a77703efaaa17e7e012e7019b1a5cfee9b128e67119c0c",
1046
+ "bytes": 4776
1047
1047
  },
1048
1048
  "mcp/tool-description-provider.d.ts": {
1049
1049
  "sha256": "1d46abc3112e11b68e57197e846f5708293ec9b2281fa71a9124ee2aad71e41b",
@@ -1666,8 +1666,8 @@
1666
1666
  "bytes": 619
1667
1667
  },
1668
1668
  "v2/durable-core/domain/observation-builder.js": {
1669
- "sha256": "204aae8e096ae1b947812fa5edd3b1c91706460a302f401a9a95956d124fe893",
1670
- "bytes": 1830
1669
+ "sha256": "df833dd1e0663acd4daf99cf27dc599b2c5e4a9e9c60ef88c1b0932892f9c62a",
1670
+ "bytes": 1951
1671
1671
  },
1672
1672
  "v2/durable-core/domain/outputs.d.ts": {
1673
1673
  "sha256": "adc32e4b86c8036eac61096fe83371140c7de140db414227041a8854435f8f54",
@@ -2190,12 +2190,12 @@
2190
2190
  "bytes": 457
2191
2191
  },
2192
2192
  "v2/infra/local/data-dir/index.d.ts": {
2193
- "sha256": "082fce7c347f0e9fade727365c84010afce4cb1c10a56c31cd5598ba029a6f59",
2194
- "bytes": 993
2193
+ "sha256": "b51169bfbaabb5149f874dc7b545099bc67e3615d92ee1a11163b39f1d12e218",
2194
+ "bytes": 1016
2195
2195
  },
2196
2196
  "v2/infra/local/data-dir/index.js": {
2197
- "sha256": "79543d5c1797c34e171a848fd6c4bcef3c5f8aa76fbe58aa5ae68cfa9aca6d18",
2198
- "bytes": 3602
2197
+ "sha256": "9ed70045f305f8a2257cab5fd5c7c2b9d12b678d15a67dde177145d1524c0f6b",
2198
+ "bytes": 3671
2199
2199
  },
2200
2200
  "v2/infra/local/directory-listing/index.d.ts": {
2201
2201
  "sha256": "3139014cb738db3b0f10beca01a3a4a35b9ab8e72c8889b3bbff204fdbcb6b6c",
@@ -2366,8 +2366,8 @@
2366
2366
  "bytes": 77
2367
2367
  },
2368
2368
  "v2/ports/data-dir.port.d.ts": {
2369
- "sha256": "6f028592ad207b522e2b00be1d7252004595d48354a0a4d95a6be575c876ac59",
2370
- "bytes": 756
2369
+ "sha256": "6713270d6e894e6ecf3cd6f0583dd16edfea771827ea2693f6ea54b970e97eb2",
2370
+ "bytes": 779
2371
2371
  },
2372
2372
  "v2/ports/data-dir.port.js": {
2373
2373
  "sha256": "d43aa81f5bc89faa359e0f97c814ba25155591ff078fbb9bfd40f8c7c9683230",
@@ -2646,12 +2646,12 @@
2646
2646
  "bytes": 4795
2647
2647
  },
2648
2648
  "v2/usecases/console-routes.d.ts": {
2649
- "sha256": "13731496ddce8a46f1674d24a8660e790016dc77cd562106b95436e5a5513f72",
2650
- "bytes": 456
2649
+ "sha256": "1afd41269475fecd4a27ac1c5faccf1e369ec8bd8c9a9c36d38ed92c79e4ec51",
2650
+ "bytes": 508
2651
2651
  },
2652
2652
  "v2/usecases/console-routes.js": {
2653
- "sha256": "c9f3047a070722fc6142dedcb7b74b12581d969eba63af6b788ddb9e4ad59806",
2654
- "bytes": 12838
2653
+ "sha256": "315a10a46cacd8bf279ecdc72b2cfac912f6d6ab5f606419d16fd734c9406718",
2654
+ "bytes": 15503
2655
2655
  },
2656
2656
  "v2/usecases/console-service.d.ts": {
2657
2657
  "sha256": "489a44091dc1b15ff938a60f9cf3ac28363614b9f0446286f0914157528c7eb3",
@@ -35,6 +35,8 @@ var __importStar = (this && this.__importStar) || (function () {
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
36
  exports.createToolContext = createToolContext;
37
37
  exports.composeServer = composeServer;
38
+ const fs = __importStar(require("fs"));
39
+ const path = __importStar(require("path"));
38
40
  const zod_to_json_schema_js_1 = require("./zod-to-json-schema.js");
39
41
  const container_js_1 = require("../di/container.js");
40
42
  const tokens_js_1 = require("../di/tokens.js");
@@ -164,6 +166,18 @@ async function composeServer() {
164
166
  await (0, container_js_1.bootstrap)();
165
167
  const ctx = await createToolContext();
166
168
  const timingRingBuffer = new tool_call_timing_js_1.ToolCallTimingRingBuffer(tool_call_timing_js_1.DEFAULT_RING_BUFFER_CAPACITY);
169
+ let serverVersion = 'unknown';
170
+ try {
171
+ const pkgPath = path.resolve(__dirname, '../../package.json');
172
+ const pkgJson = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
173
+ if (pkgJson.version)
174
+ serverVersion = pkgJson.version;
175
+ }
176
+ catch {
177
+ }
178
+ const toolCallsPerfFile = ctx.v2?.dataDir
179
+ ? path.join(ctx.v2.dataDir.perfDir(), 'tool-calls.jsonl')
180
+ : null;
167
181
  if (ctx.v2 && ctx.httpServer && ctx.v2.dataDir && ctx.v2.directoryListing) {
168
182
  const { ConsoleService } = await Promise.resolve().then(() => __importStar(require('../v2/usecases/console-service.js')));
169
183
  const { mountConsoleRoutes } = await Promise.resolve().then(() => __importStar(require('../v2/usecases/console-routes.js')));
@@ -174,7 +188,7 @@ async function composeServer() {
174
188
  snapshotStore: ctx.v2.snapshotStore,
175
189
  pinnedWorkflowStore: ctx.v2.pinnedStore,
176
190
  });
177
- ctx.httpServer.mountRoutes((app) => mountConsoleRoutes(app, consoleService, ctx.workflowService, timingRingBuffer));
191
+ ctx.httpServer.mountRoutes((app) => mountConsoleRoutes(app, consoleService, ctx.workflowService, timingRingBuffer, toolCallsPerfFile ?? undefined, serverVersion));
178
192
  console.error('[Console] v2 Console API routes mounted at /api/v2/');
179
193
  }
180
194
  ctx.httpServer?.finalize();
@@ -218,9 +232,16 @@ async function composeServer() {
218
232
  tools,
219
233
  }));
220
234
  const devMode = (0, dev_mode_js_1.isDevMode)();
221
- const timingSink = devMode
222
- ? (0, tool_call_timing_js_1.composeSinks)((0, tool_call_timing_js_1.createRingBufferSink)(timingRingBuffer), (0, tool_call_timing_js_1.createDevPerfSink)())
223
- : (0, tool_call_timing_js_1.createRingBufferSink)(timingRingBuffer);
235
+ const diskSink = toolCallsPerfFile
236
+ ? (0, tool_call_timing_js_1.createDiskPersistSink)(toolCallsPerfFile, serverVersion)
237
+ : null;
238
+ const timingSink = diskSink
239
+ ? devMode
240
+ ? (0, tool_call_timing_js_1.composeSinks)((0, tool_call_timing_js_1.createRingBufferSink)(timingRingBuffer), diskSink, (0, tool_call_timing_js_1.createDevPerfSink)())
241
+ : (0, tool_call_timing_js_1.composeSinks)((0, tool_call_timing_js_1.createRingBufferSink)(timingRingBuffer), diskSink)
242
+ : devMode
243
+ ? (0, tool_call_timing_js_1.composeSinks)((0, tool_call_timing_js_1.createRingBufferSink)(timingRingBuffer), (0, tool_call_timing_js_1.createDevPerfSink)())
244
+ : (0, tool_call_timing_js_1.createRingBufferSink)(timingRingBuffer);
224
245
  if (devMode) {
225
246
  console.error('[PerfTrace] WORKRAIL_DEV=1 -- tool call timing active');
226
247
  }
@@ -1,4 +1,7 @@
1
1
  export type ToolCallOutcome = 'success' | 'error' | 'unknown_tool';
2
+ export type ToolCallTimingEntry = ToolCallTiming & {
3
+ readonly serverVersion: string;
4
+ };
2
5
  export interface ToolCallTiming {
3
6
  readonly toolName: string;
4
7
  readonly startedAtMs: number;
@@ -19,6 +22,7 @@ export declare class ToolCallTimingRingBuffer {
19
22
  }
20
23
  export declare const DEFAULT_RING_BUFFER_CAPACITY = 100;
21
24
  export declare function createRingBufferSink(buffer: ToolCallTimingRingBuffer): ToolCallTimingSink;
25
+ export declare function createDiskPersistSink(perfFilePath: string, serverVersion: string): ToolCallTimingSink;
22
26
  export declare function createDevPerfSink(): ToolCallTimingSink;
23
27
  export declare function composeSinks(...sinks: ToolCallTimingSink[]): ToolCallTimingSink;
24
28
  export declare function withToolCallTiming<T>(toolName: string, handler: () => Promise<T>, sink: ToolCallTimingSink): Promise<T>;
@@ -1,10 +1,46 @@
1
1
  "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
2
35
  Object.defineProperty(exports, "__esModule", { value: true });
3
36
  exports.DEFAULT_RING_BUFFER_CAPACITY = exports.ToolCallTimingRingBuffer = exports.noopToolCallTimingSink = void 0;
4
37
  exports.createRingBufferSink = createRingBufferSink;
38
+ exports.createDiskPersistSink = createDiskPersistSink;
5
39
  exports.createDevPerfSink = createDevPerfSink;
6
40
  exports.composeSinks = composeSinks;
7
41
  exports.withToolCallTiming = withToolCallTiming;
42
+ const fs = __importStar(require("fs"));
43
+ const path = __importStar(require("path"));
8
44
  const noopToolCallTimingSink = () => { };
9
45
  exports.noopToolCallTimingSink = noopToolCallTimingSink;
10
46
  class ToolCallTimingRingBuffer {
@@ -44,6 +80,22 @@ function createRingBufferSink(buffer) {
44
80
  buffer.push(timing);
45
81
  };
46
82
  }
83
+ function createDiskPersistSink(perfFilePath, serverVersion) {
84
+ try {
85
+ fs.mkdirSync(path.dirname(perfFilePath), { recursive: true });
86
+ }
87
+ catch { }
88
+ let warnedOnce = false;
89
+ return (timing) => {
90
+ const entry = { ...timing, serverVersion };
91
+ fs.promises.appendFile(perfFilePath, JSON.stringify(entry) + '\n').catch((err) => {
92
+ if (!warnedOnce) {
93
+ warnedOnce = true;
94
+ process.stderr.write(`[PerfTrace] Failed to write timing to disk: ${String(err)}\n`);
95
+ }
96
+ });
97
+ };
98
+ }
47
99
  function createDevPerfSink() {
48
100
  return (timing) => {
49
101
  const outcomeLabel = timing.outcome === 'success' ? 'OK' : timing.outcome.toUpperCase();
@@ -34,6 +34,8 @@ function anchorsToObservations(anchors) {
34
34
  });
35
35
  break;
36
36
  case 'repo_root':
37
+ if (anchor.value.length > constants_js_1.MAX_OBSERVATION_SHORT_STRING_LENGTH)
38
+ break;
37
39
  observations.push({
38
40
  key: 'repo_root',
39
41
  value: { type: 'short_string', value: anchor.value },
@@ -21,4 +21,5 @@ export declare class LocalDataDirV2 implements DataDirPortV2 {
21
21
  tokenIndexPath(): string;
22
22
  managedSourcesPath(): string;
23
23
  managedSourcesLockPath(): string;
24
+ perfDir(): string;
24
25
  }
@@ -95,5 +95,8 @@ class LocalDataDirV2 {
95
95
  managedSourcesLockPath() {
96
96
  return path.join(this.root(), 'managed-sources', 'managed-sources.lock');
97
97
  }
98
+ perfDir() {
99
+ return path.join(this.root(), 'perf');
100
+ }
98
101
  }
99
102
  exports.LocalDataDirV2 = LocalDataDirV2;
@@ -16,4 +16,5 @@ export interface DataDirPortV2 {
16
16
  tokenIndexPath(): string;
17
17
  managedSourcesPath(): string;
18
18
  managedSourcesLockPath(): string;
19
+ perfDir(): string;
19
20
  }
@@ -2,4 +2,4 @@ import type { Application } from 'express';
2
2
  import type { ConsoleService } from './console-service.js';
3
3
  import type { WorkflowService } from '../../application/services/workflow-service.js';
4
4
  import type { ToolCallTimingRingBuffer } from '../../mcp/tool-call-timing.js';
5
- export declare function mountConsoleRoutes(app: Application, consoleService: ConsoleService, workflowService?: WorkflowService, timingRingBuffer?: ToolCallTimingRingBuffer): () => void;
5
+ export declare function mountConsoleRoutes(app: Application, consoleService: ConsoleService, workflowService?: WorkflowService, timingRingBuffer?: ToolCallTimingRingBuffer, toolCallsPerfFile?: string, serverVersion?: string): () => void;
@@ -53,7 +53,7 @@ function loadWorkflowTags() {
53
53
  return { version: 0, tags: [], workflows: {} };
54
54
  }
55
55
  }
56
- function mountConsoleRoutes(app, consoleService, workflowService, timingRingBuffer) {
56
+ function mountConsoleRoutes(app, consoleService, workflowService, timingRingBuffer, toolCallsPerfFile, serverVersion) {
57
57
  const sseClients = new Set();
58
58
  let sseDebounceTimer = null;
59
59
  function broadcastChange() {
@@ -99,14 +99,71 @@ function mountConsoleRoutes(app, consoleService, workflowService, timingRingBuff
99
99
  req.on('close', () => { sseClients.delete(res); });
100
100
  res.on('close', () => { sseClients.delete(res); });
101
101
  });
102
+ const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
103
+ const PERF_FILE_READ_LIMIT_BYTES = 5 * 1024 * 1024;
104
+ async function readDiskEntries(perfFile) {
105
+ try {
106
+ const stat = await fs_1.default.promises.stat(perfFile);
107
+ let raw;
108
+ if (stat.size > PERF_FILE_READ_LIMIT_BYTES) {
109
+ const fd = await fs_1.default.promises.open(perfFile, 'r');
110
+ const offset = stat.size - PERF_FILE_READ_LIMIT_BYTES;
111
+ const buf = Buffer.alloc(PERF_FILE_READ_LIMIT_BYTES);
112
+ await fd.read(buf, 0, PERF_FILE_READ_LIMIT_BYTES, offset);
113
+ await fd.close();
114
+ raw = buf.toString('utf8');
115
+ }
116
+ else {
117
+ raw = await fs_1.default.promises.readFile(perfFile, 'utf8');
118
+ }
119
+ const cutoff = Date.now() - THIRTY_DAYS_MS;
120
+ return raw
121
+ .split('\n')
122
+ .filter(Boolean)
123
+ .flatMap((line) => {
124
+ try {
125
+ const entry = JSON.parse(line);
126
+ if (typeof entry.toolName !== 'string' ||
127
+ typeof entry.startedAtMs !== 'number' ||
128
+ typeof entry.durationMs !== 'number' ||
129
+ (entry.outcome !== 'success' && entry.outcome !== 'error' && entry.outcome !== 'unknown_tool'))
130
+ return [];
131
+ const safeEntry = typeof entry.serverVersion === 'string'
132
+ ? entry
133
+ : { ...entry, serverVersion: 'unknown' };
134
+ if (safeEntry.startedAtMs < cutoff)
135
+ return [];
136
+ return [safeEntry];
137
+ }
138
+ catch {
139
+ return [];
140
+ }
141
+ });
142
+ }
143
+ catch {
144
+ return [];
145
+ }
146
+ }
102
147
  const devMode = (0, dev_mode_js_1.isDevMode)();
103
148
  if (devMode) {
104
- app.get('/api/v2/perf/tool-calls', (req, res) => {
149
+ app.get('/api/v2/perf/tool-calls', async (req, res) => {
105
150
  const rawLimit = req.query['limit'];
106
151
  const limit = typeof rawLimit === 'string' ? parseInt(rawLimit, 10) : undefined;
107
152
  const safeLimit = (limit !== undefined && Number.isFinite(limit) && limit > 0) ? limit : undefined;
108
- const observations = timingRingBuffer ? timingRingBuffer.recent(safeLimit) : [];
109
- res.json({ success: true, data: { observations, total: timingRingBuffer?.size ?? 0, devMode } });
153
+ const diskEntries = toolCallsPerfFile ? await readDiskEntries(toolCallsPerfFile) : [];
154
+ const ringEntries = timingRingBuffer ? timingRingBuffer.recent(safeLimit) : [];
155
+ const version = serverVersion ?? 'unknown';
156
+ const ringEntriesWithVersion = ringEntries.map((t) => ({
157
+ ...t,
158
+ serverVersion: version,
159
+ }));
160
+ const dedupeKey = (e) => `${e.toolName}:${e.startedAtMs}:${e.durationMs}`;
161
+ const inMemoryKeys = new Set(ringEntriesWithVersion.map(dedupeKey));
162
+ const diskOnlyEntries = diskEntries.filter((e) => !inMemoryKeys.has(dedupeKey(e)));
163
+ const allEntries = [...ringEntriesWithVersion, ...diskOnlyEntries]
164
+ .sort((a, b) => b.startedAtMs - a.startedAtMs)
165
+ .slice(0, safeLimit ?? undefined);
166
+ res.json({ success: true, data: { observations: allEntries, devMode } });
110
167
  });
111
168
  }
112
169
  app.get('/api/v2/sessions', async (_req, res) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exaudeus/workrail",
3
- "version": "3.18.1",
3
+ "version": "3.19.0",
4
4
  "description": "Step-by-step workflow enforcement for AI agents via MCP",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -170,11 +170,12 @@
170
170
  "State what result the authored workflow should reliably produce for its user.",
171
171
  "List the criteria that would make the workflow feel genuinely satisfying in practice.",
172
172
  "Name the biggest likely failure mode and the most dangerous false-confidence mode.",
173
- "State what would make the workflow technically correct but still disappointing."
173
+ "State what would make the workflow technically correct but still disappointing.",
174
+ "Build a success-criteria-to-mechanism map: for each item in userSatisfactionCriteria, name the specific step(s) that enforce it and the enforcement mechanism (gate, self-audit, second pass, example contrast, rubric). Any criterion with no named mechanism is a gap -- either add a mechanism to the planned architecture or flag it as a known weakness to address in Phase 3."
174
175
  ],
175
176
  "outputRequired": {
176
- "notesMarkdown": "Effectiveness target, satisfaction criteria, failure modes, and false-confidence risks.",
177
- "context": "Capture effectivenessTarget, userSatisfactionCriteria, primaryFailureMode, dangerousFalseConfidenceModes, likelyWeakOutcomeModes, and trustRisk."
177
+ "notesMarkdown": "Effectiveness target, satisfaction criteria, failure modes, false-confidence risks, and the success-criteria-to-mechanism map with any gaps identified.",
178
+ "context": "Capture effectivenessTarget, userSatisfactionCriteria, primaryFailureMode, dangerousFalseConfidenceModes, likelyWeakOutcomeModes, trustRisk, and successCriteriaToMechanismMap."
178
179
  },
179
180
  "verify": [
180
181
  "The authored workflow now has a clear outcome bar, not just an authoring bar."
@@ -492,11 +493,12 @@
492
493
  "Reviewer-family or validator output is evidence, not authority."
493
494
  ],
494
495
  "procedure": [
495
- "Score these dimensions 0-2 with one sentence of evidence each: `voiceClarity`, `ceremonyLevel`, `loopSoundness`, `delegationBoundedness`, `artifactClarity`, `taskEffectiveness`, `falseConfidenceResistance`, `stateMinimality`, `coverageSharpness`, `domainFit`, `handoffUtility`, `rigorAdaptability` (0 = adapts to complexity/rigor levels, 2 = single-weight), `enforcementStrength` (0 = behavioral rules have structural teeth; 2 = important rules are prose-only with no enforcement mechanism), and `modernizationDiscipline` (0 = every valueInventory item preserved, equivalently replaced with justification, or dropped with justification; 2 = items missing or replaced with weaker versions without justification -- score 0 for create mode).",
496
+ "Score these dimensions 0-2 with one sentence of evidence each: `voiceClarity`, `ceremonyLevel`, `loopSoundness`, `delegationBoundedness`, `artifactClarity`, `taskEffectiveness`, `falseConfidenceResistance`, `stateMinimality`, `coverageSharpness`, `domainFit`, `handoffUtility`, `rigorAdaptability` (0 = adapts to complexity/rigor levels, 2 = single-weight), `enforcementStrength` (0 = behavioral rules have structural teeth; 2 = important rules are prose-only with no enforcement mechanism), `outputQualityMechanisms` (0 = each key generative step has a concrete enforcement mechanism for output quality such as a self-audit, second pass, example contrast, or rubric; 1 = some generative steps rely only on metaGuidance or prose rules; 2 = generative steps have no quality enforcement beyond prompt wording), and `modernizationDiscipline` (0 = every valueInventory item preserved, equivalently replaced with justification, or dropped with justification; 2 = items missing or replaced with weaker versions without justification -- score 0 for create mode).",
497
+ "For each generative step (any step whose output is judged on content quality rather than structural correctness), run an adversarial trace: what does a lazy or average agent do here, and does the prompt prevent it? A step where the lazy path produces a plausible-looking but shallow result scores poorly on `outputQualityMechanisms`. Check each such step against the successCriteriaToMechanismMap from Phase 1 -- any criterion whose mechanism lives in this step must actually be enforced here.",
496
498
  "If delegation is available and rigor is THOROUGH, run an adversarial review bundle with these lenses: `engine_native_reviewer`, `task_effectiveness_reviewer`, `state_economy_reviewer`, `false_confidence_reviewer`, `domain_fit_reviewer`, and `maintainer_reviewer`.",
497
499
  "Synthesize what the review confirmed, what it challenged, and what changed your mind.",
498
500
  "When scoring `falseConfidenceResistance`, explicitly check: do the workflow's quality gates protect edge cases and degraded paths, or only the happy path? A workflow that passes its own checks on ideal input but fails silently on minimal or unexpected input scores 2.",
499
- "Set hard-gate failures whenever any of these are materially weak: `taskEffectiveness`, `falseConfidenceResistance`, `stateMinimality`, `coverageSharpness`, `domainFit`, or `handoffUtility`.",
501
+ "Set hard-gate failures whenever any of these are materially weak: `taskEffectiveness`, `falseConfidenceResistance`, `stateMinimality`, `coverageSharpness`, `domainFit`, `handoffUtility`, or `outputQualityMechanisms`.",
500
502
  "Set `authoringIntegrityPassed = true` only if structural and authoring-quality dimensions are all acceptable. Set `outcomeEffectivenessPassed = true` only if the workflow is likely to achieve satisfying results for the user."
501
503
  ],
502
504
  "outputRequired": {
@@ -548,6 +550,7 @@
548
550
  { "type": "contains", "value": "handoffUtility", "message": "Review must score handoffUtility" },
549
551
  { "type": "contains", "value": "rigorAdaptability", "message": "Review must score rigorAdaptability" },
550
552
  { "type": "contains", "value": "enforcementStrength", "message": "Review must score enforcementStrength" },
553
+ { "type": "contains", "value": "outputQualityMechanisms", "message": "Review must score outputQualityMechanisms" },
551
554
  {
552
555
  "type": "contains",
553
556
  "value": "modernizationDiscipline",
@@ -665,6 +668,31 @@
665
668
  },
666
669
  "notesOptional": true,
667
670
  "requireConfirmation": false
671
+ },
672
+ {
673
+ "id": "phase-8-process-retrospective",
674
+ "title": "Phase 8: Process Retrospective",
675
+ "promptBlocks": {
676
+ "goal": "While this run is still fresh, identify gaps in the workflow-for-workflows process itself -- not in the authored workflow.",
677
+ "constraints": [
678
+ "This step is about the authoring process, not the authored workflow. Do not summarize the workflow again.",
679
+ "Be honest. If the process caught everything, say so. If it missed something, say specifically where and why."
680
+ ],
681
+ "procedure": [
682
+ "Look back at the full run. Were any weaknesses in the authored workflow only identified post-hoc -- after the quality gate loop, by the user, or by a later reviewer?",
683
+ "For each weakness identified late: name the Phase where it should have been caught, explain why the current step failed to surface it, and propose a specific change to that step's procedure or scoring rubric that would catch it on the next run.",
684
+ "Check the successCriteriaToMechanismMap from Phase 1 against the final workflow: did every criterion end up with a concrete enforcement mechanism? Any criterion that is still prose-only in the final file is a gap in the Phase 1 or Phase 3 process.",
685
+ "If nothing was missed and the process caught all meaningful issues in-band, say so explicitly and explain what worked."
686
+ ],
687
+ "outputRequired": {
688
+ "notesMarkdown": "Process gaps found (or confirmed absent), where they should have been caught, and concrete proposed changes to workflow-for-workflows. This output is raw material for improving this workflow.",
689
+ "context": "Capture processGaps and suggestedProcessImprovements."
690
+ },
691
+ "verify": [
692
+ "The retrospective identifies specific step-level changes, not vague general improvements."
693
+ ]
694
+ },
695
+ "requireConfirmation": false
668
696
  }
669
697
  ],
670
698
  "validatedAgainstSpecVersion": 3