@exaudeus/workrail 3.0.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,5 +9,10 @@ export type ResolveTemplatesPassError = {
9
9
  readonly code: 'TEMPLATE_EXPAND_ERROR';
10
10
  readonly stepId: string;
11
11
  readonly cause: TemplateExpandError;
12
+ } | {
13
+ readonly code: 'DUPLICATE_STEP_ID';
14
+ readonly stepId: string;
15
+ readonly templateId: string;
16
+ readonly message: string;
12
17
  };
13
18
  export declare function resolveTemplatesPass(steps: readonly (WorkflowStepDefinition | LoopStepDefinition)[], registry: TemplateRegistry): Result<readonly (WorkflowStepDefinition | LoopStepDefinition)[], ResolveTemplatesPassError>;
@@ -23,10 +23,17 @@ function resolveStepTemplate(step, registry) {
23
23
  cause: expandResult.error,
24
24
  });
25
25
  }
26
+ if (step.runCondition) {
27
+ return (0, neverthrow_1.ok)(expandResult.value.map(expanded => ({
28
+ ...expanded,
29
+ runCondition: expanded.runCondition ?? step.runCondition,
30
+ })));
31
+ }
26
32
  return (0, neverthrow_1.ok)(expandResult.value);
27
33
  }
28
34
  function resolveTemplatesPass(steps, registry) {
29
35
  const resolved = [];
36
+ const seenIds = new Set();
30
37
  for (const step of steps) {
31
38
  if ((0, workflow_definition_js_1.isLoopStepDefinition)(step)) {
32
39
  if (Array.isArray(step.body)) {
@@ -35,11 +42,22 @@ function resolveTemplatesPass(steps, registry) {
35
42
  const res = resolveStepTemplate(bodyStep, registry);
36
43
  if (res.isErr())
37
44
  return (0, neverthrow_1.err)(res.error);
45
+ for (const expanded of res.value) {
46
+ const collision = checkIdCollision(expanded.id, bodyStep.templateCall?.templateId, seenIds);
47
+ if (collision)
48
+ return (0, neverthrow_1.err)(collision);
49
+ }
38
50
  bodyResolved.push(...res.value);
39
51
  }
52
+ const loopCollision = checkIdCollision(step.id, undefined, seenIds);
53
+ if (loopCollision)
54
+ return (0, neverthrow_1.err)(loopCollision);
40
55
  resolved.push({ ...step, body: bodyResolved });
41
56
  }
42
57
  else {
58
+ const loopCollision = checkIdCollision(step.id, undefined, seenIds);
59
+ if (loopCollision)
60
+ return (0, neverthrow_1.err)(loopCollision);
43
61
  resolved.push(step);
44
62
  }
45
63
  }
@@ -47,8 +65,25 @@ function resolveTemplatesPass(steps, registry) {
47
65
  const res = resolveStepTemplate(step, registry);
48
66
  if (res.isErr())
49
67
  return (0, neverthrow_1.err)(res.error);
68
+ for (const expanded of res.value) {
69
+ const collision = checkIdCollision(expanded.id, step.templateCall?.templateId, seenIds);
70
+ if (collision)
71
+ return (0, neverthrow_1.err)(collision);
72
+ }
50
73
  resolved.push(...res.value);
51
74
  }
52
75
  }
53
76
  return (0, neverthrow_1.ok)(resolved);
54
77
  }
78
+ function checkIdCollision(stepId, templateId, seenIds) {
79
+ if (seenIds.has(stepId)) {
80
+ return {
81
+ code: 'DUPLICATE_STEP_ID',
82
+ stepId,
83
+ templateId: templateId ?? '(none)',
84
+ message: `Duplicate step id '${stepId}' after template expansion${templateId ? ` (from template '${templateId}')` : ''}`,
85
+ };
86
+ }
87
+ seenIds.add(stepId);
88
+ return undefined;
89
+ }
@@ -0,0 +1,11 @@
1
+ import type { Result } from 'neverthrow';
2
+ import type { WorkflowDefinition } from '../../../types/workflow-definition.js';
3
+ export interface RoutineLoadWarning {
4
+ readonly file: string;
5
+ readonly reason: string;
6
+ }
7
+ export interface RoutineLoadResult {
8
+ readonly routines: ReadonlyMap<string, WorkflowDefinition>;
9
+ readonly warnings: readonly RoutineLoadWarning[];
10
+ }
11
+ export declare function loadRoutineDefinitions(routinesDir?: string): Result<RoutineLoadResult, string>;
@@ -0,0 +1,45 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.loadRoutineDefinitions = loadRoutineDefinitions;
7
+ const fs_1 = require("fs");
8
+ const path_1 = __importDefault(require("path"));
9
+ const neverthrow_1 = require("neverthrow");
10
+ const workflow_definition_js_1 = require("../../../types/workflow-definition.js");
11
+ function resolveRoutinesDir() {
12
+ const projectRoot = path_1.default.resolve(__dirname, '..', '..', '..', '..');
13
+ return path_1.default.join(projectRoot, 'workflows', 'routines');
14
+ }
15
+ function loadRoutineDefinitions(routinesDir) {
16
+ const dir = routinesDir ?? resolveRoutinesDir();
17
+ if (!(0, fs_1.existsSync)(dir)) {
18
+ return (0, neverthrow_1.ok)({ routines: new Map(), warnings: [] });
19
+ }
20
+ let files;
21
+ try {
22
+ files = (0, fs_1.readdirSync)(dir).filter(f => f.endsWith('.json'));
23
+ }
24
+ catch (e) {
25
+ return (0, neverthrow_1.err)(`Failed to read routines directory '${dir}': ${e}`);
26
+ }
27
+ const routines = new Map();
28
+ const warnings = [];
29
+ for (const file of files) {
30
+ const filePath = path_1.default.join(dir, file);
31
+ try {
32
+ const raw = (0, fs_1.readFileSync)(filePath, 'utf-8');
33
+ const parsed = JSON.parse(raw);
34
+ if (!(0, workflow_definition_js_1.hasWorkflowDefinitionShape)(parsed)) {
35
+ warnings.push({ file, reason: 'does not match WorkflowDefinition shape (missing id, name, description, version, or steps)' });
36
+ continue;
37
+ }
38
+ routines.set(parsed.id, parsed);
39
+ }
40
+ catch (e) {
41
+ warnings.push({ file, reason: String(e) });
42
+ }
43
+ }
44
+ return (0, neverthrow_1.ok)({ routines, warnings });
45
+ }
@@ -16,6 +16,6 @@ export interface TemplateRegistry {
16
16
  readonly has: (templateId: string) => boolean;
17
17
  readonly knownIds: () => readonly string[];
18
18
  }
19
- export declare function createRoutineExpander(routineId: string, definition: WorkflowDefinition): Result<TemplateExpander, TemplateExpandError>;
20
19
  export declare function routineIdToTemplateId(routineId: string): string;
21
- export declare function createTemplateRegistry(routineDefinitions?: ReadonlyMap<string, WorkflowDefinition>): TemplateRegistry;
20
+ export declare function createRoutineExpander(routineId: string, definition: WorkflowDefinition): Result<TemplateExpander, TemplateExpandError>;
21
+ export declare function createTemplateRegistry(routineExpanders?: ReadonlyMap<string, TemplateExpander>): TemplateRegistry;
@@ -1,78 +1,95 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.createRoutineExpander = createRoutineExpander;
4
3
  exports.routineIdToTemplateId = routineIdToTemplateId;
4
+ exports.createRoutineExpander = createRoutineExpander;
5
5
  exports.createTemplateRegistry = createTemplateRegistry;
6
6
  const neverthrow_1 = require("neverthrow");
7
7
  const SINGLE_BRACE_ARG = /(?<!\{)\{([^{}]+)\}(?!\})/g;
8
- function substituteArgs(text, args, routineId, stepId) {
8
+ function isSubstitutableValue(value) {
9
+ return typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean';
10
+ }
11
+ function substituteArgs(template, args, templateId, routineId, stepId) {
9
12
  const missing = [];
10
- const substituted = text.replace(SINGLE_BRACE_ARG, (match, argName) => {
11
- if (argName in args) {
12
- return String(args[argName]);
13
+ const badType = [];
14
+ const result = template.replace(SINGLE_BRACE_ARG, (match, argName) => {
15
+ if (!(argName in args)) {
16
+ missing.push(argName);
17
+ return match;
13
18
  }
14
- missing.push(argName);
15
- return match;
19
+ const value = args[argName];
20
+ if (!isSubstitutableValue(value)) {
21
+ badType.push(argName);
22
+ return match;
23
+ }
24
+ return String(value);
16
25
  });
17
26
  if (missing.length > 0) {
18
27
  return (0, neverthrow_1.err)({
19
28
  code: 'TEMPLATE_EXPAND_FAILED',
20
- templateId: `wr.templates.routine.${routineId}`,
29
+ templateId,
21
30
  message: `MISSING_TEMPLATE_ARG: routine '${routineId}' step '${stepId}' references arg(s) '${missing.join("', '")}' but they were not provided in templateCall.args`,
22
31
  });
23
32
  }
24
- return (0, neverthrow_1.ok)(substituted);
25
- }
26
- function validateNoRecursiveTemplateCall(routineId, steps) {
27
- for (const step of steps) {
28
- if (step.templateCall) {
29
- return (0, neverthrow_1.err)({
30
- code: 'TEMPLATE_EXPAND_FAILED',
31
- templateId: `wr.templates.routine.${routineId}`,
32
- message: `Routine '${routineId}' step '${step.id}' contains a templateCall. Recursive routine injection is not supported.`,
33
- });
34
- }
33
+ if (badType.length > 0) {
34
+ return (0, neverthrow_1.err)({
35
+ code: 'TEMPLATE_EXPAND_FAILED',
36
+ templateId,
37
+ message: `INVALID_TEMPLATE_ARG_TYPE: routine '${routineId}' step '${stepId}' arg(s) '${badType.join("', '")}' must be string, number, or boolean (got non-primitive)`,
38
+ });
35
39
  }
36
- return (0, neverthrow_1.ok)(undefined);
40
+ return (0, neverthrow_1.ok)(result);
41
+ }
42
+ function routineIdToTemplateId(routineId) {
43
+ const name = routineId.startsWith('routine-') ? routineId.slice('routine-'.length) : routineId;
44
+ return `wr.templates.routine.${name}`;
37
45
  }
38
46
  function createRoutineExpander(routineId, definition) {
39
- const routineSteps = definition.steps;
40
- const recursiveCheck = validateNoRecursiveTemplateCall(routineId, routineSteps);
41
- if (recursiveCheck.isErr())
42
- return (0, neverthrow_1.err)(recursiveCheck.error);
43
- for (const step of routineSteps) {
44
- if (!step.id || !step.title) {
47
+ for (const step of definition.steps) {
48
+ if ('templateCall' in step && step.templateCall) {
45
49
  return (0, neverthrow_1.err)({
46
50
  code: 'TEMPLATE_EXPAND_FAILED',
47
- templateId: `wr.templates.routine.${routineId}`,
48
- message: `Routine '${routineId}' step '${step.id ?? '(missing id)'}' is missing required field '${!step.id ? 'id' : 'title'}'.`,
49
- });
50
- }
51
- if (!step.prompt) {
52
- return (0, neverthrow_1.err)({
53
- code: 'TEMPLATE_EXPAND_FAILED',
54
- templateId: `wr.templates.routine.${routineId}`,
55
- message: `Routine '${routineId}' step '${step.id}' is missing required field 'prompt'.`,
51
+ templateId: routineIdToTemplateId(routineId),
52
+ message: `Routine '${routineId}' step '${step.id}' contains a templateCall. Recursive routine injection is not allowed.`,
56
53
  });
57
54
  }
58
55
  }
59
- const routineGuidance = definition.metaGuidance ?? [];
56
+ const templateId = routineIdToTemplateId(routineId);
60
57
  const expander = (callerId, args) => {
61
58
  const expandedSteps = [];
62
- for (const step of routineSteps) {
63
- const promptResult = substituteArgs(step.prompt, args, routineId, step.id);
59
+ for (const step of definition.steps) {
60
+ if (!step.id || !step.title) {
61
+ return (0, neverthrow_1.err)({
62
+ code: 'TEMPLATE_EXPAND_FAILED',
63
+ templateId,
64
+ message: `Routine '${routineId}' step '${step.id ?? '(missing id)'}' is missing required field '${!step.id ? 'id' : 'title'}'.`,
65
+ });
66
+ }
67
+ if (!step.prompt) {
68
+ return (0, neverthrow_1.err)({
69
+ code: 'TEMPLATE_EXPAND_FAILED',
70
+ templateId,
71
+ message: `Routine '${routineId}' step '${step.id}' is missing required field 'prompt'.`,
72
+ });
73
+ }
74
+ const promptResult = substituteArgs(step.prompt, args, templateId, routineId, step.id);
64
75
  if (promptResult.isErr())
65
76
  return (0, neverthrow_1.err)(promptResult.error);
66
- const mergedGuidance = routineGuidance.length > 0
67
- ? [...(step.guidance ?? []), ...routineGuidance]
68
- : (step.guidance ?? []);
77
+ const titleResult = substituteArgs(step.title, args, templateId, routineId, step.id);
78
+ if (titleResult.isErr())
79
+ return (0, neverthrow_1.err)(titleResult.error);
69
80
  const expandedStep = {
81
+ ...step,
70
82
  id: `${callerId}.${step.id}`,
71
- title: step.title,
83
+ title: titleResult.value,
72
84
  prompt: promptResult.value,
73
- ...(step.agentRole !== undefined && { agentRole: step.agentRole }),
74
- ...(mergedGuidance.length > 0 && { guidance: mergedGuidance }),
75
- ...(step.requireConfirmation !== undefined && { requireConfirmation: step.requireConfirmation }),
85
+ ...(definition.metaGuidance && definition.metaGuidance.length > 0
86
+ ? {
87
+ guidance: [
88
+ ...(step.guidance ?? []),
89
+ ...definition.metaGuidance,
90
+ ],
91
+ }
92
+ : {}),
76
93
  };
77
94
  expandedSteps.push(expandedStep);
78
95
  }
@@ -80,26 +97,18 @@ function createRoutineExpander(routineId, definition) {
80
97
  };
81
98
  return (0, neverthrow_1.ok)(expander);
82
99
  }
83
- function routineIdToTemplateId(routineId) {
84
- const name = routineId.startsWith('routine-') ? routineId.slice('routine-'.length) : routineId;
85
- return `wr.templates.routine.${name}`;
86
- }
87
- const STATIC_TEMPLATE_DEFINITIONS = new Map();
88
- function createTemplateRegistry(routineDefinitions) {
89
- const allTemplates = new Map(STATIC_TEMPLATE_DEFINITIONS);
90
- if (routineDefinitions) {
91
- for (const [routineId, definition] of routineDefinitions) {
92
- const templateId = routineIdToTemplateId(routineId);
93
- const expanderResult = createRoutineExpander(routineId, definition);
94
- if (expanderResult.isOk()) {
95
- allTemplates.set(templateId, expanderResult.value);
96
- }
100
+ const TEMPLATE_DEFINITIONS = new Map();
101
+ function createTemplateRegistry(routineExpanders) {
102
+ const allExpanders = new Map(TEMPLATE_DEFINITIONS);
103
+ if (routineExpanders) {
104
+ for (const [id, expander] of routineExpanders) {
105
+ allExpanders.set(id, expander);
97
106
  }
98
107
  }
99
- const knownIds = [...allTemplates.keys()];
108
+ const knownIds = [...allExpanders.keys()];
100
109
  return {
101
110
  resolve(templateId) {
102
- const expander = allTemplates.get(templateId);
111
+ const expander = allExpanders.get(templateId);
103
112
  if (!expander) {
104
113
  return (0, neverthrow_1.err)({
105
114
  code: 'UNKNOWN_TEMPLATE',
@@ -110,7 +119,7 @@ function createTemplateRegistry(routineDefinitions) {
110
119
  return (0, neverthrow_1.ok)(expander);
111
120
  },
112
121
  has(templateId) {
113
- return allTemplates.has(templateId);
122
+ return allExpanders.has(templateId);
114
123
  },
115
124
  knownIds() {
116
125
  return knownIds;
@@ -2,7 +2,6 @@ import { Workflow, WorkflowStepDefinition, LoopStepDefinition } from '../../type
2
2
  import type { LoopConditionSource } from '../../types/workflow-definition';
3
3
  import type { Result } from 'neverthrow';
4
4
  import { type DomainError } from '../../domain/execution/error';
5
- import { type TemplateRegistry } from './compiler/template-registry';
6
5
  export interface CompiledLoop {
7
6
  readonly loop: LoopStepDefinition;
8
7
  readonly bodySteps: readonly WorkflowStepDefinition[];
@@ -15,11 +14,8 @@ export interface CompiledWorkflow {
15
14
  readonly compiledLoops: ReadonlyMap<string, CompiledLoop>;
16
15
  readonly loopBodyStepIds: ReadonlySet<string>;
17
16
  }
18
- export declare function resolveDefinitionSteps(steps: readonly (WorkflowStepDefinition | LoopStepDefinition)[], features: readonly string[], templateRegistry?: TemplateRegistry): Result<readonly (WorkflowStepDefinition | LoopStepDefinition)[], DomainError>;
17
+ export declare function resolveDefinitionSteps(steps: readonly (WorkflowStepDefinition | LoopStepDefinition)[], features: readonly string[]): Result<readonly (WorkflowStepDefinition | LoopStepDefinition)[], DomainError>;
19
18
  export declare class WorkflowCompiler {
20
- private readonly templateRegistry;
21
- constructor();
22
- static withTemplateRegistry(registry: TemplateRegistry): WorkflowCompiler;
23
19
  compile(workflow: Workflow): Result<CompiledWorkflow, DomainError>;
24
20
  private deriveConditionSource;
25
21
  private resolveLoopBody;
@@ -5,10 +5,6 @@ var __decorate = (this && this.__decorate) || function (decorators, target, key,
5
5
  else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
6
6
  return c > 3 && r && Object.defineProperty(target, key, r), r;
7
7
  };
8
- var __metadata = (this && this.__metadata) || function (k, v) {
9
- if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
10
- };
11
- var WorkflowCompiler_1;
12
8
  Object.defineProperty(exports, "__esModule", { value: true });
13
9
  exports.WorkflowCompiler = void 0;
14
10
  exports.resolveDefinitionSteps = resolveDefinitionSteps;
@@ -24,16 +20,47 @@ const resolve_features_1 = require("./compiler/resolve-features");
24
20
  const feature_registry_1 = require("./compiler/feature-registry");
25
21
  const resolve_templates_1 = require("./compiler/resolve-templates");
26
22
  const template_registry_1 = require("./compiler/template-registry");
23
+ const routine_loader_1 = require("./compiler/routine-loader");
27
24
  const _refRegistry = (0, ref_registry_1.createRefRegistry)();
28
25
  const _featureRegistry = (0, feature_registry_1.createFeatureRegistry)();
29
- const _defaultTemplateRegistry = (0, template_registry_1.createTemplateRegistry)();
30
- function resolveDefinitionSteps(steps, features, templateRegistry = _defaultTemplateRegistry) {
31
- const templatesResult = (0, resolve_templates_1.resolveTemplatesPass)(steps, templateRegistry);
26
+ function buildTemplateRegistry() {
27
+ const routineExpanders = new Map();
28
+ const loadResult = (0, routine_loader_1.loadRoutineDefinitions)();
29
+ if (loadResult.isErr()) {
30
+ console.warn(`[WorkflowCompiler] Failed to load routine definitions: ${loadResult.error}`);
31
+ return (0, template_registry_1.createTemplateRegistry)();
32
+ }
33
+ const { routines, warnings } = loadResult.value;
34
+ for (const w of warnings) {
35
+ console.warn(`[WorkflowCompiler] Skipped routine file '${w.file}': ${w.reason}`);
36
+ }
37
+ for (const [routineId, definition] of routines) {
38
+ const expanderResult = (0, template_registry_1.createRoutineExpander)(routineId, definition);
39
+ if (expanderResult.isOk()) {
40
+ routineExpanders.set((0, template_registry_1.routineIdToTemplateId)(routineId), expanderResult.value);
41
+ }
42
+ else {
43
+ console.warn(`[WorkflowCompiler] Failed to create expander for routine '${routineId}': ${expanderResult.error.message}`);
44
+ }
45
+ }
46
+ return (0, template_registry_1.createTemplateRegistry)(routineExpanders.size > 0 ? routineExpanders : undefined);
47
+ }
48
+ let _templateRegistryCache;
49
+ function getTemplateRegistry() {
50
+ if (!_templateRegistryCache) {
51
+ _templateRegistryCache = buildTemplateRegistry();
52
+ }
53
+ return _templateRegistryCache;
54
+ }
55
+ function resolveDefinitionSteps(steps, features) {
56
+ const templatesResult = (0, resolve_templates_1.resolveTemplatesPass)(steps, getTemplateRegistry());
32
57
  if (templatesResult.isErr()) {
33
58
  const e = templatesResult.error;
34
59
  const message = e.code === 'TEMPLATE_RESOLVE_ERROR'
35
60
  ? `Step '${e.stepId}': template error — ${e.cause.message}`
36
- : `Step '${e.stepId}': template expansion error — ${e.cause.message}`;
61
+ : e.code === 'DUPLICATE_STEP_ID'
62
+ ? e.message
63
+ : `Step '${e.stepId}': template expansion error — ${e.cause.message}`;
37
64
  return (0, neverthrow_1.err)(error_1.Err.invalidState(message));
38
65
  }
39
66
  const featuresResult = (0, resolve_features_1.resolveFeaturesPass)(templatesResult.value, features, _featureRegistry);
@@ -59,17 +86,9 @@ function resolveDefinitionSteps(steps, features, templateRegistry = _defaultTemp
59
86
  }
60
87
  return (0, neverthrow_1.ok)(blocksResult.value);
61
88
  }
62
- let WorkflowCompiler = WorkflowCompiler_1 = class WorkflowCompiler {
63
- constructor() {
64
- this.templateRegistry = _defaultTemplateRegistry;
65
- }
66
- static withTemplateRegistry(registry) {
67
- const compiler = new WorkflowCompiler_1();
68
- compiler.templateRegistry = registry;
69
- return compiler;
70
- }
89
+ let WorkflowCompiler = class WorkflowCompiler {
71
90
  compile(workflow) {
72
- const resolvedResult = resolveDefinitionSteps(workflow.definition.steps, workflow.definition.features ?? [], this.templateRegistry);
91
+ const resolvedResult = resolveDefinitionSteps(workflow.definition.steps, workflow.definition.features ?? []);
73
92
  if (resolvedResult.isErr())
74
93
  return (0, neverthrow_1.err)(resolvedResult.error);
75
94
  const steps = resolvedResult.value;
@@ -170,7 +189,6 @@ let WorkflowCompiler = WorkflowCompiler_1 = class WorkflowCompiler {
170
189
  }
171
190
  };
172
191
  exports.WorkflowCompiler = WorkflowCompiler;
173
- exports.WorkflowCompiler = WorkflowCompiler = WorkflowCompiler_1 = __decorate([
174
- (0, tsyringe_1.singleton)(),
175
- __metadata("design:paramtypes", [])
192
+ exports.WorkflowCompiler = WorkflowCompiler = __decorate([
193
+ (0, tsyringe_1.singleton)()
176
194
  ], WorkflowCompiler);
@@ -26,7 +26,6 @@ export declare class FileWorkflowStorage implements IWorkflowStorage {
26
26
  private loadDefinitionFromFile;
27
27
  loadAllWorkflows(): Promise<readonly Workflow[]>;
28
28
  getWorkflowById(id: string): Promise<Workflow | null>;
29
- getRoutineDefinitions(): Promise<ReadonlyMap<string, WorkflowDefinition>>;
30
29
  listWorkflowSummaries(): Promise<readonly WorkflowSummary[]>;
31
30
  save(definition: WorkflowDefinition): Promise<void>;
32
31
  }
@@ -164,16 +164,6 @@ class FileWorkflowStorage {
164
164
  }
165
165
  return workflow;
166
166
  }
167
- async getRoutineDefinitions() {
168
- const index = await this.getWorkflowIndex();
169
- const routines = new Map();
170
- for (const entry of index.values()) {
171
- if (entry.id.startsWith('routine-')) {
172
- routines.set(entry.id, entry.definition);
173
- }
174
- }
175
- return routines;
176
- }
177
167
  async listWorkflowSummaries() {
178
168
  const workflows = await this.loadAllWorkflows();
179
169
  return workflows.map(workflow_1.toWorkflowSummary);
@@ -42,20 +42,28 @@
42
42
  "bytes": 4214
43
43
  },
44
44
  "application/services/compiler/resolve-templates.d.ts": {
45
- "sha256": "623e4b9e7b65c1cbe8a6acee232ba3fa7b11c9394f22a77c7443a02681b0a0a8",
46
- "bytes": 767
45
+ "sha256": "ccb4fbbf00f3ccbfcb9225998bcb9bfbd30a7e64dcfc2b2bc0f06701ee9a8e33",
46
+ "bytes": 905
47
47
  },
48
48
  "application/services/compiler/resolve-templates.js": {
49
- "sha256": "2d7bef8627cd933fa265d9faf9b0d6ced860bd66b39793af7ffa97956ba38a09",
50
- "bytes": 1997
49
+ "sha256": "96f152a9f8b16aa7779b17ac56b555e45ad1213b32607961f060a080c6610a1a",
50
+ "bytes": 3590
51
+ },
52
+ "application/services/compiler/routine-loader.d.ts": {
53
+ "sha256": "831ed318f498c20ef9d547f38aea22a3fa2c677cd923634129545e7bf69ad241",
54
+ "bytes": 481
55
+ },
56
+ "application/services/compiler/routine-loader.js": {
57
+ "sha256": "fa5db5a759d88e63ef65bc877311d41003813d742fbdda7d06bc8abf56007988",
58
+ "bytes": 1849
51
59
  },
52
60
  "application/services/compiler/template-registry.d.ts": {
53
- "sha256": "0945a6c997eccd03f96e1893e513519a78bfb923a832f3d66a2a31c8fdb2a945",
54
- "bytes": 1172
61
+ "sha256": "5c8aef7bd75fce04d56985932eb2ad37e23c8bc87a77c75ee2addf018d3e619d",
62
+ "bytes": 1168
55
63
  },
56
64
  "application/services/compiler/template-registry.js": {
57
- "sha256": "79dab84c14a2adb99b7b1ab2e02986e6fd7b8bed501dfa781e418164763ce0fb",
58
- "bytes": 5147
65
+ "sha256": "92f824723e8d137533f8c752f2f753bb2a88e5812631de0a6428e3c6ff44543a",
66
+ "bytes": 5283
59
67
  },
60
68
  "application/services/enhanced-error-service.d.ts": {
61
69
  "sha256": "b6fe8fad92717f0962f87aa9c0f88277bf28fe2b5e3cfd7875612ee57eb8c684",
@@ -98,12 +106,12 @@
98
106
  "bytes": 32055
99
107
  },
100
108
  "application/services/workflow-compiler.d.ts": {
101
- "sha256": "94ebc79efd351f6f1c29e98e57731c573c291632e148a78e04a13b3d1160dbc9",
102
- "bytes": 1455
109
+ "sha256": "41d0643ae2f07e5ce77a6e02344b5ca5b3c26bde828fbb307528a2ae097ac9d5",
110
+ "bytes": 1211
103
111
  },
104
112
  "application/services/workflow-compiler.js": {
105
- "sha256": "f0c185af822082f605e1d01612987ac9d75ade104a2606621e0fcd8faa899485",
106
- "bytes": 8796
113
+ "sha256": "1adfbce7e79f827b1be530128dd9f4fc65bf427c851d7669fd11f20d4639631f",
114
+ "bytes": 9578
107
115
  },
108
116
  "application/services/workflow-interpreter.d.ts": {
109
117
  "sha256": "56b5b5ad06d42096deba9f0abe7642c18a355a1e598749aab1730df4e9847674",
@@ -546,12 +554,12 @@
546
554
  "bytes": 17854
547
555
  },
548
556
  "infrastructure/storage/file-workflow-storage.d.ts": {
549
- "sha256": "6a10df10ab073c4d3611b42e69e1fe45bdb9d2e437af2c3b6ef90361698d2be5",
550
- "bytes": 1507
557
+ "sha256": "3ddd5c692ebec52dc36cf96013db510a8b0eb77aaeeb7ba38b646a69083f002a",
558
+ "bytes": 1428
551
559
  },
552
560
  "infrastructure/storage/file-workflow-storage.js": {
553
- "sha256": "1db12e1adcac4b119b44fcfbfb1b31e8c15c055fc040c1a82774238e8bfc6e41",
554
- "bytes": 8897
561
+ "sha256": "e5cfd6f8a7fed72961e041aa9c92765c28f4b9872c16e7cbdbd533f0177ea0f0",
562
+ "bytes": 8562
555
563
  },
556
564
  "infrastructure/storage/git-workflow-storage.d.ts": {
557
565
  "sha256": "67d7f10e12c78c674ced83da378cd159465d4b09176d8dfca240864b0d6f38c2",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exaudeus/workrail",
3
- "version": "3.0.0",
3
+ "version": "3.2.0",
4
4
  "description": "Step-by-step workflow enforcement for AI agents via MCP",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "id": "coding-task-workflow-agentic",
3
3
  "name": "Agentic Task Dev Workflow (Lean • Notes-First • WorkRail Executor)",
4
- "version": "2.1.0",
4
+ "version": "1.0.0",
5
5
  "description": "Lean variant of the agentic coding workflow. Merges triage, inputs gate, context gathering, and re-triage into a single Understand & Classify phase. Reduces context variable count and removes top-level clarificationPrompts. Same quality guarantees with fewer tokens.",
6
6
  "recommendedPreferences": {
7
7
  "recommendedAutonomy": "guided",
@@ -40,13 +40,51 @@
40
40
  }
41
41
  },
42
42
  {
43
- "id": "phase-1-architecture-decision",
44
- "title": "Phase 1: Architecture Decision (Generate, Compare, Challenge, Select)",
43
+ "id": "phase-1a-hypothesis",
44
+ "title": "Phase 1a: State Hypothesis",
45
45
  "runCondition": {
46
46
  "var": "taskComplexity",
47
47
  "not_equals": "Small"
48
48
  },
49
- "prompt": "Design the architecture through deep understanding, not surface-level generation.\n\nPart A — Understand the problem deeply:\n- What are the core tensions in this problem? (e.g., performance vs simplicity, flexibility vs type safety, backward compatibility vs clean design)\n- How does the codebase already solve similar problems? Study the most relevant existing patterns — don't just list files, analyze the architectural decisions and constraints they protect.\n- What's the simplest naive solution? Why is it insufficient? (If it IS sufficient, that's your leading candidate — the burden of proof is on complexity.)\n- What makes this problem hard? What would a junior developer miss?\n\nPart B — Identify tensions and constraints (including philosophy):\n- Extract 2-4 real tradeoffs from your understanding (not generic labels like 'simplicity' or 'maintainability')\n- These tensions drive candidate generation — each candidate resolves them differently\n- Filter `philosophySources` to the principles actually under pressure for THIS problem. Which of the dev's philosophy principles constrain the solution space? For example: does the simplest solution require mutable state when the dev prefers immutability? Does the existing pattern use exceptions when the dev prefers Result types? Would the cleanest approach violate their preference for small interfaces?\n- If `philosophyConflicts` exist for this area of the codebase, surface them as explicit tensions the design must resolve: follow the stated rule, follow the existing pattern, or reconcile them\n\nPart C — State your hypothesis before delegating:\nBefore spawning any subagents, write 2-3 sentences: what do you currently believe the best approach is, and what concerns you most about it? This is your reference point for interrogating subagent output later.\n\nPart D — Generate candidates from tensions:\n- QUICK: self-generate candidates from your tensions. Include mandatory candidates: (1) simplest possible change that satisfies acceptance criteria, (2) follow existing repo pattern.\n- STANDARD: spawn ONE WorkRail Executor running `routine-tension-driven-design` with your tensions, philosophy sources, and problem understanding as input. Simultaneously, spawn ONE WorkRail Executor running `routine-hypothesis-challenge`: 'What constraints or failure modes would make you choose a fundamentally different approach? Propose one, grounded in real reasons.'\n- THOROUGH: spawn ONE WorkRail Executor running `routine-tension-driven-design`, ONE running `routine-hypothesis-challenge` (adversarial divergence), and ONE running `routine-execution-simulation`: 'Trace through the leading approach's 3 most likely failure scenarios step by step.'\n- For STANDARD with riskLevel=High: also spawn the execution simulation subagent.\n\nThe main agent ALWAYS self-generates its own candidates too (at minimum the two mandatory ones: simplest change + existing pattern). Subagent candidates supplement, not replace, your own thinking.\n\nPart E — Interrogate subagent output (if subagents were used):\nDo NOT summarize subagent findings as your own. Instead, interrogate against your hypothesis:\n- Where do subagent findings challenge your hypothesis? Are they right or did they miss context?\n- What did they surface that you genuinely hadn't considered?\n- Where are they just restating the obvious or echoing each other?\n- What did they get wrong or overweight?\nState explicitly: what you changed your mind about and why, or what you held firm on despite their input and why.\n\nPart F — Compare via tradeoffs (not checklists):\nFor each surviving candidate, produce:\n- One-sentence summary of the approach\n- Which tensions it resolves and which it accepts\n- The specific failure mode you'd watch for\n- How it relates to existing repo patterns (follows / adapts / departs)\n- What you gain and what you give up\n- Which of the dev's philosophy principles it honors and which it conflicts with — be specific (principle name + how)\n\nPart G — Challenge the leading option:\n- STANDARD: optionally challenge with ONE WorkRail Executor running `routine-hypothesis-challenge`\n- THOROUGH: challenge top 1-2 candidates using ONE or TWO WorkRail Executors running `routine-hypothesis-challenge`\n\nPart H — Select:\nSet context variables:\n- `selectedApproach` — the chosen design with rationale tied back to tensions\n- `runnerUpApproach` — the next-best option and why it lost\n- `architectureRationale` — which tensions were resolved and which were accepted\n- `pivotTriggers` — specific conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` — the failure mode of the selected approach\n- `acceptedTradeoffs` — what the selected approach gives up (feeds directly into design review)\n- `identifiedFailureModes` — per-candidate failure modes (feeds directly into design review)\n\nRules:\n- the main agent owns the final decision; subagents contribute depth, not decisions\n- if the simplest solution satisfies acceptance criteria, prefer it — complexity must justify itself\n- if the challenged leading candidate no longer looks best, switch deliberately rather than defending sunk cost\n- subagents go deep on specific questions, not wide on generic plans",
49
+ "prompt": "Before any design work, state your current hypothesis in 3-5 sentences.\n\nBased on what you learned in Phase 0, write:\n1. Your current best guess for the approach\n2. Your main concern about that guess\n3. What would most likely make that guess wrong\n\nThis is your reference point. After design generation, you will compare the result against this hypothesis and say what changed your mind or what held firm.\n\nSet context variable:\n- `initialHypothesis`",
50
+ "requireConfirmation": false
51
+ },
52
+ {
53
+ "id": "phase-1b-design-quick",
54
+ "title": "Phase 1b: Lightweight Design (QUICK)",
55
+ "runCondition": {
56
+ "and": [
57
+ { "var": "taskComplexity", "not_equals": "Small" },
58
+ { "var": "rigorMode", "equals": "QUICK" }
59
+ ]
60
+ },
61
+ "prompt": "Generate a lightweight design inline. QUICK rigor means the path is clear and risk is low.\n\nProduce two mandatory candidates:\n1. The simplest possible change that satisfies acceptance criteria\n2. Follow the existing repo pattern for this kind of change\n\nFor each candidate:\n- One-sentence summary\n- Which tensions it resolves and which it accepts\n- How it relates to existing repo patterns (follows / adapts / departs)\n- Failure mode to watch\n- Philosophy fit (name specific principles)\n\nCompare and recommend. If both converge on the same approach, say so honestly.\n\nWrite the output to `design-candidates.md` with this structure:\n- Problem Understanding (core tensions, what makes it hard)\n- Philosophy Constraints (which principles matter for this problem)\n- Candidates (each with: summary, tensions resolved/accepted, failure mode, philosophy fit)\n- Comparison and Recommendation\n- Open Questions (if any remain)",
62
+ "requireConfirmation": false
63
+ },
64
+ {
65
+ "id": "phase-1b-design-deep",
66
+ "title": "Phase 1b: Design Generation (Injected Routine — Tension-Driven Design)",
67
+ "runCondition": {
68
+ "and": [
69
+ { "var": "taskComplexity", "not_equals": "Small" },
70
+ { "var": "rigorMode", "not_equals": "QUICK" }
71
+ ]
72
+ },
73
+ "templateCall": {
74
+ "templateId": "wr.templates.routine.tension-driven-design",
75
+ "args": {
76
+ "deliverableName": "design-candidates.md"
77
+ }
78
+ }
79
+ },
80
+ {
81
+ "id": "phase-1c-challenge-and-select",
82
+ "title": "Phase 1c: Challenge and Select",
83
+ "runCondition": {
84
+ "var": "taskComplexity",
85
+ "not_equals": "Small"
86
+ },
87
+ "prompt": "Read `design-candidates.md`, compare against your initial hypothesis, and make the final architecture decision.\n\nInput contract: both QUICK and deep design paths produce `design-candidates.md` with candidates, tradeoffs, and a recommendation. Use that artifact as your primary input.\n\nPart A — Compare to hypothesis:\nRevisit `initialHypothesis`. Now that you have design candidates:\n- Where did the design work confirm your hypothesis?\n- Where did it challenge or change your thinking?\n- What did you learn that you hadn't considered?\nState explicitly what changed your mind and what held firm.\n\nPart B — Challenge the leading option:\n- What's the strongest argument against the recommended approach?\n- What assumption, if wrong, would invalidate it?\n- STANDARD/THOROUGH: optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge` focused on the leading option's failure modes\n- THOROUGH: optionally also spawn ONE WorkRail Executor running `routine-execution-simulation` to trace the 3 most likely failure scenarios\n\nPart C — Select:\nMake the final architecture decision. The design output is evidence, not a decision — you own the choice.\n\nIf the simplest solution satisfies acceptance criteria, prefer it. Complexity must justify itself. If the challenged leading candidate no longer looks best, switch deliberately rather than defending sunk cost.\n\nSet context variables:\n- `selectedApproach` — the chosen design with rationale tied back to tensions\n- `runnerUpApproach` — the next-best option and why it lost\n- `architectureRationale` — which tensions were resolved and which were accepted\n- `pivotTriggers` — specific conditions under which you'd switch to the runner-up\n- `keyRiskToMonitor` — the failure mode of the selected approach\n- `acceptedTradeoffs` — what the selected approach gives up (feeds directly into design review)\n- `identifiedFailureModes` — per-candidate failure modes (feeds directly into design review)",
50
88
  "requireConfirmation": {
51
89
  "or": [
52
90
  { "var": "automationLevel", "equals": "Low" },
@@ -74,13 +112,30 @@
74
112
  },
75
113
  "body": [
76
114
  {
77
- "id": "phase-2a-review-design",
78
- "title": "Review Design for Gaps, Issues, and Improvements",
79
- "prompt": "Review the selected architecture using the explicit tradeoffs and failure modes from Phase 1 as your review criteria — not a generic gaps checklist.\n\nTargeted review (derived from Phase 1 outputs):\n1. Are the `acceptedTradeoffs` actually acceptable? For each accepted tradeoff, verify it won't violate acceptance criteria or invariants under realistic conditions.\n2. Are the `identifiedFailureModes` actually handled? For each failure mode, trace through the design and confirm there's a mitigation path. If not, flag it.\n3. Does the selected approach's relationship to existing repo patterns hold up? If it 'adapts' an existing pattern, verify the adaptation doesn't break the invariants the original pattern protects.\n4. Is there a simpler version of the selected approach that still satisfies acceptance criteria? (Complexity must continue to justify itself.)\n\nCompare against the runner-up:\n- Are there elements from the runner-up that would strengthen the selected approach without adding complexity?\n- Would a hybrid resolve an accepted tradeoff that's bothering you?\n\nPhilosophy alignment: does the architecture respect the user's active coding rules?\n\nBefore delegating, state your current assessment: what do you think the strongest and weakest parts of the design are right now?\n\nMode-adaptive delegation:\n- QUICK: self-review only\n- STANDARD: optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge` focused specifically on the accepted tradeoffs and failure modes\n- THOROUGH: spawn TWO WorkRail Executors — `routine-hypothesis-challenge` on tradeoffs + `routine-execution-simulation` on failure modes\n\nAfter receiving subagent output (if used), interrogate it against your pre-assessment. Do not adopt their framing wholesale. State what changed your thinking and what didn't.\n\nIf issues are found, fix the design (update `selectedApproach`, `architectureRationale`, `pivotTriggers`, `acceptedTradeoffs`, `identifiedFailureModes`) before continuing.\n\nSet context variables:\n- `designFindings`\n- `designRevised`",
115
+ "id": "phase-2a-pre-assess-design-review",
116
+ "title": "Pre-Assess Design Review",
117
+ "prompt": "Before the detailed design review, state your current assessment in 2-4 sentences.\n\nSay:\n- what you think the strongest part of the selected design is right now\n- what you think the weakest part is right now\n- which tradeoff or failure mode worries you most\n\nThis is your reference point for interpreting the review findings.\n\nSet context variable:\n- `designReviewAssessment`",
118
+ "requireConfirmation": false
119
+ },
120
+ {
121
+ "id": "phase-2b-design-review-core",
122
+ "title": "Design Review Core",
123
+ "templateCall": {
124
+ "templateId": "wr.templates.routine.design-review",
125
+ "args": {
126
+ "deliverableName": "design-review-findings.md"
127
+ }
128
+ },
80
129
  "requireConfirmation": false
81
130
  },
82
131
  {
83
- "id": "phase-2b-loop-decision",
132
+ "id": "phase-2c-synthesize-design-review",
133
+ "title": "Synthesize Design Review Findings",
134
+ "prompt": "Read `design-review-findings.md` and synthesize the review into workflow-owned decisions.\n\nPart A — Compare against your pre-assessment:\nRevisit `designReviewAssessment`.\n- What did the review confirm?\n- What did it surface that you missed?\n- What changed your mind and what held firm?\n\nPart B — Optional mode-adaptive challenge around the review findings:\n- QUICK: self-synthesize only\n- STANDARD: optionally spawn ONE WorkRail Executor running `routine-hypothesis-challenge` focused on the most serious review finding\n- THOROUGH: optionally spawn TWO WorkRail Executors — `routine-hypothesis-challenge` on the most serious finding + `routine-execution-simulation` on the most dangerous failure mode\n\nPart C — Decide:\nInterpret the findings yourself. Do not adopt the review artifact or any subagent framing wholesale.\n\nIf issues are found, fix the design (update `selectedApproach`, `architectureRationale`, `pivotTriggers`, `acceptedTradeoffs`, `identifiedFailureModes`) before continuing.\n\nSet context variables:\n- `designFindings`\n- `designRevised`",
135
+ "requireConfirmation": false
136
+ },
137
+ {
138
+ "id": "phase-2d-loop-decision",
84
139
  "title": "Design Review Loop Decision",
85
140
  "prompt": "Provide a loop control artifact.\n\nDecision rules:\n- if `designFindings` is non-empty and design was revised -> continue (verify the revision)\n- if `designFindings` is empty -> stop\n- if max iterations reached -> stop and document remaining concerns\n\nOutput exactly:\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
86
141
  "requireConfirmation": false,
@@ -204,13 +259,24 @@
204
259
  },
205
260
  "body": [
206
261
  {
207
- "id": "phase-7a-verify-and-fix",
208
- "title": "Verify Integration and Fix Issues",
209
- "prompt": "Perform integration verification across all implemented slices.\n\nRequired:\n- verify acceptance criteria\n- map invariants to concrete proof (tests, build results, explicit reasoning)\n- run whole-task validation commands\n- identify any invariant violations or regressions\n- confirm the implemented result aligns with the user's coding philosophy, naming any tensions explicitly\n- review cumulative drift across all slices\n- check whether repeated small compromises added up to a larger pattern problem\n\nIf issues are found, fix them immediately:\n- apply code fixes\n- re-run affected tests\n- update `implementation_plan.md` if the fix changed boundaries or approach\n\nSet context variables:\n- `integrationFindings`\n- `integrationPassed`\n- `regressionDetected`",
262
+ "id": "phase-7a-final-verification-core",
263
+ "title": "Final Verification Core",
264
+ "templateCall": {
265
+ "templateId": "wr.templates.routine.final-verification",
266
+ "args": {
267
+ "deliverableName": "final-verification-findings.md"
268
+ }
269
+ },
270
+ "requireConfirmation": false
271
+ },
272
+ {
273
+ "id": "phase-7b-fix-and-summarize",
274
+ "title": "Fix Issues and Summarize Verification",
275
+ "prompt": "Read `final-verification-findings.md` and turn it into workflow-owned decisions and fixes.\n\nRequired:\n- interpret the findings yourself rather than rubber-stamping them\n- identify any invariant violations or regressions that must be fixed now\n- if issues are found, fix them immediately\n- re-run affected tests\n- update `implementation_plan.md` if the fix changed boundaries or approach\n\nSet context variables:\n- `integrationFindings`\n- `integrationPassed`\n- `regressionDetected`",
210
276
  "requireConfirmation": false
211
277
  },
212
278
  {
213
- "id": "phase-7b-loop-decision",
279
+ "id": "phase-7c-loop-decision",
214
280
  "title": "Final Verification Loop Decision",
215
281
  "prompt": "Provide a loop control artifact.\n\nDecision rules:\n- if `integrationFindings` is non-empty and fixes were applied -> continue (re-verify the fixes)\n- if `integrationFindings` is empty or all issues resolved -> stop and produce handoff\n- if max iterations reached -> stop and document remaining concerns\n\nWhen stopping, include the handoff summary:\n- acceptance criteria status\n- invariant status\n- test/build summary\n- concise PR/MR description draft (why, test plan, rollout notes)\n- follow-up tickets\n- any philosophy tensions accepted intentionally and why\n\nKeep the handoff concise and executive-level. Do not auto-merge or push unless the user explicitly asks.\n\nOutput exactly:\n```json\n{\n \"artifacts\": [{\n \"kind\": \"wr.loop_control\",\n \"decision\": \"continue\"\n }]\n}\n```",
216
282
  "requireConfirmation": true,
@@ -0,0 +1,28 @@
1
+ {
2
+ "id": "routine-injection-example",
3
+ "name": "Routine Injection Example — Design with Tension-Driven Design",
4
+ "version": "1.0.0",
5
+ "description": "Example workflow that uses templateCall to inject the tension-driven-design routine inline. Validates the end-to-end routine injection path.",
6
+ "steps": [
7
+ {
8
+ "id": "phase-0-gather",
9
+ "title": "Phase 0: Gather Context",
10
+ "prompt": "Gather context about the problem space."
11
+ },
12
+ {
13
+ "id": "phase-1-design",
14
+ "title": "Phase 1: Design (Injected Routine)",
15
+ "templateCall": {
16
+ "templateId": "wr.templates.routine.tension-driven-design",
17
+ "args": {
18
+ "deliverableName": "design-candidates.md"
19
+ }
20
+ }
21
+ },
22
+ {
23
+ "id": "phase-2-implement",
24
+ "title": "Phase 2: Implement",
25
+ "prompt": "Implement the selected design."
26
+ }
27
+ ]
28
+ }
@@ -0,0 +1,60 @@
1
+ {
2
+ "id": "routine-design-review",
3
+ "name": "Design Review Routine",
4
+ "version": "1.0.0",
5
+ "description": "Reviews a selected design using explicit tradeoffs, failure modes, simpler-alternative checks, runner-up comparison, and philosophy alignment. Produces a reusable design-review findings artifact.",
6
+ "clarificationPrompts": [
7
+ "What design artifact or summary should I review?",
8
+ "What tradeoffs, failure modes, and runner-up option are available?",
9
+ "What artifact name should I produce?"
10
+ ],
11
+ "preconditions": [
12
+ "A selected design or design summary is available",
13
+ "Accepted tradeoffs and failure modes are available",
14
+ "A runner-up or alternative approach is available",
15
+ "The dev's philosophy or rules are available"
16
+ ],
17
+ "metaGuidance": [
18
+ "PURPOSE: review the quality of a selected design, not generate a fresh design from scratch.",
19
+ "ROLE: you are a reviewer looking for real gaps, not generic criticism.",
20
+ "PHILOSOPHY: name tensions by principle when they matter.",
21
+ "SIMPLICITY: always ask whether a simpler version would still satisfy acceptance criteria."
22
+ ],
23
+ "steps": [
24
+ {
25
+ "id": "step-review-tradeoffs",
26
+ "title": "Step 1: Review Accepted Tradeoffs",
27
+ "prompt": "Review the selected design by walking through the accepted tradeoffs explicitly.\n\nFor each accepted tradeoff:\n- Verify it will not violate acceptance criteria or invariants under realistic conditions\n- Identify what would make the tradeoff no longer acceptable\n- Note any hidden assumptions\n\nWorking notes:\n- Tradeoff review\n- Hidden assumptions\n- Conditions under which the tradeoff fails",
28
+ "agentRole": "You are a reviewer validating whether accepted tradeoffs are actually acceptable.",
29
+ "requireConfirmation": false
30
+ },
31
+ {
32
+ "id": "step-review-failure-modes",
33
+ "title": "Step 2: Review Failure Modes",
34
+ "prompt": "Review the identified failure modes for the selected design.\n\nFor each failure mode:\n- Trace whether the design handles it adequately\n- Identify missing mitigations\n- Note which failure mode is most dangerous if it occurs\n\nWorking notes:\n- Failure mode coverage\n- Missing mitigations\n- Highest-risk failure mode",
35
+ "agentRole": "You are a failure analyst checking whether the design can survive realistic problems.",
36
+ "requireConfirmation": false
37
+ },
38
+ {
39
+ "id": "step-compare-runner-up",
40
+ "title": "Step 3: Compare Against Runner-Up and Simpler Alternatives",
41
+ "prompt": "Compare the selected design against the runner-up and a simpler possible variant.\n\nReview:\n- Whether the runner-up has elements worth pulling into the selected design\n- Whether a hybrid would resolve an uncomfortable tradeoff without adding much complexity\n- Whether a simpler version of the selected design would still satisfy acceptance criteria\n\nWorking notes:\n- Runner-up strengths worth borrowing\n- Simpler alternative analysis\n- Hybrid opportunities",
42
+ "agentRole": "You are comparing options honestly rather than defending the current favorite.",
43
+ "requireConfirmation": false
44
+ },
45
+ {
46
+ "id": "step-review-philosophy",
47
+ "title": "Step 4: Review Philosophy Alignment",
48
+ "prompt": "Review the selected design against the dev's philosophy.\n\nName the principles that matter for this design and assess:\n- Which principles are satisfied clearly\n- Which principles are under tension\n- Which tensions are acceptable versus risky\n\nWorking notes:\n- Relevant principles\n- Satisfied principles\n- Tensions and why they matter",
49
+ "agentRole": "You are checking whether the design respects what the dev actually values.",
50
+ "requireConfirmation": false
51
+ },
52
+ {
53
+ "id": "step-deliver",
54
+ "title": "Step 5: Deliver Design Review Findings",
55
+ "prompt": "Create `{deliverableName}`.\n\nRequired structure:\n- Tradeoff Review\n- Failure Mode Review\n- Runner-Up / Simpler Alternative Review\n- Philosophy Alignment\n- Findings (Red / Orange / Yellow or equivalent severity)\n- Recommended Revisions\n- Residual Concerns\n\nOptimize for concise, actionable findings that the main workflow can interpret and decide on.",
56
+ "agentRole": "You are delivering a review artifact for the main workflow to synthesize and act on.",
57
+ "requireConfirmation": false
58
+ }
59
+ ]
60
+ }
@@ -0,0 +1,62 @@
1
+ {
2
+ "id": "routine-final-verification",
3
+ "name": "Final Verification Routine",
4
+ "version": "1.0.0",
5
+ "description": "Performs reusable final verification over acceptance criteria, invariants, validation evidence, regressions, cumulative drift, and philosophy alignment. Produces a proof-oriented verification artifact built around claim -> evidence -> gap -> severity -> readiness verdict.",
6
+ "clarificationPrompts": [
7
+ "What implementation or slices should I verify?",
8
+ "What acceptance criteria and invariants must hold?",
9
+ "What validation commands or evidence are available?",
10
+ "What artifact name should I produce?"
11
+ ],
12
+ "preconditions": [
13
+ "Implementation is available for review",
14
+ "Acceptance criteria are available",
15
+ "Invariants are available",
16
+ "A deterministic validation path exists"
17
+ ],
18
+ "metaGuidance": [
19
+ "PURPOSE: verify whether the whole task is truly done, not just locally green.",
20
+ "ROLE: you are a verifier proving or disproving readiness using evidence.",
21
+ "PROOF: map every readiness claim to tests, build output, artifacts, or explicit reasoning.",
22
+ "SEVERITY: every gap should be classified clearly so the caller knows what blocks shipping.",
23
+ "DRIFT: look for cumulative compromise, not just isolated defects."
24
+ ],
25
+ "steps": [
26
+ {
27
+ "id": "step-map-claims-to-proof",
28
+ "title": "Step 1: Map Acceptance Criteria and Invariants to Proof",
29
+ "prompt": "Map the implementation's readiness claims to concrete proof.\n\nFor each acceptance criterion and invariant:\n- state the claim clearly\n- identify the strongest supporting evidence (test, build output, artifact, code reasoning)\n- note whether the proof is strong, partial, or missing\n- record any gap that prevents the claim from being fully proven\n\nWorking notes:\n- Claim -> proof matrix\n- Strong / partial / missing proof\n- Gaps that weaken readiness",
30
+ "agentRole": "You are a verifier mapping claims to concrete proof.",
31
+ "requireConfirmation": false
32
+ },
33
+ {
34
+ "id": "step-review-validation-evidence",
35
+ "title": "Step 2: Review Validation Evidence Quality",
36
+ "prompt": "Review the overall validation evidence quality.\n\nCheck:\n- whether the right validation commands were run\n- whether the evidence is trustworthy and sufficient for readiness\n- whether any critical area has only weak or indirect proof\n- whether additional validation would materially change confidence\n\nWorking notes:\n- Validation commands reviewed\n- Evidence strength assessment\n- Missing or weak proof",
37
+ "agentRole": "You are checking whether the validation story is actually strong enough to trust.",
38
+ "requireConfirmation": false
39
+ },
40
+ {
41
+ "id": "step-classify-gaps-and-regressions",
42
+ "title": "Step 3: Classify Gaps, Regressions, and Drift by Severity",
43
+ "prompt": "Review the implementation for regressions, drift, and unresolved gaps.\n\nCheck:\n- invariant violations or regressions\n- whether repeated small compromises added up to a larger pattern problem\n- whether the implementation still matches intended plan boundaries\n- whether any proof gaps should block shipping versus merely lower confidence\n\nClassify each issue by severity:\n- Red: blocks readiness\n- Orange: should be fixed before shipping if possible\n- Yellow: acceptable tension or bounded follow-up\n\nWorking notes:\n- Regressions found\n- Drift assessment\n- Severity-classified gaps",
44
+ "agentRole": "You are looking for the subtle ways a task can go wrong even when individual slices seemed fine.",
45
+ "requireConfirmation": false
46
+ },
47
+ {
48
+ "id": "step-review-philosophy",
49
+ "title": "Step 4: Review Philosophy Alignment",
50
+ "prompt": "Review the final result against the dev's philosophy.\n\nAssess:\n- which principles are clearly satisfied\n- which tensions remain intentionally accepted\n- which philosophy violations should be severity Red, Orange, or Yellow\n- whether any philosophy concern changes the readiness verdict\n\nWorking notes:\n- Satisfied principles\n- Accepted tensions\n- Severity-classified philosophy concerns",
51
+ "agentRole": "You are checking whether the finished result still reflects the dev's standards.",
52
+ "requireConfirmation": false
53
+ },
54
+ {
55
+ "id": "step-deliver",
56
+ "title": "Step 5: Deliver Final Verification Findings",
57
+ "prompt": "Create `{deliverableName}`.\n\nRequired structure:\n- Readiness Claims and Proof Matrix\n - claim\n - supporting evidence\n - proof strength (strong / partial / missing)\n - proof gap\n- Validation Evidence Summary\n- Severity-Classified Gaps\n - Red (blocking)\n - Orange (should fix)\n - Yellow (accepted tension / follow-up)\n- Regression / Drift Review\n- Philosophy Alignment\n- Recommended Fixes\n- Readiness Verdict\n - Ready\n - Ready with Accepted Tensions\n - Not Ready\n\nOptimize for a compact artifact the main workflow can use to decide whether to fix, re-verify, or hand off.",
58
+ "agentRole": "You are delivering a verification artifact the main workflow can interpret and act on.",
59
+ "requireConfirmation": false
60
+ }
61
+ ]
62
+ }
@@ -1,113 +1,69 @@
1
1
  {
2
2
  "id": "routine-hypothesis-challenge",
3
3
  "name": "Hypothesis Challenge Routine",
4
- "version": "1.1.0",
5
- "description": "Adversarial testing of hypotheses using an Ideate -> Plan -> Execute strategy. Configurable rigor levels (1-5) allow for progressively deeper skepticism and stress testing.",
4
+ "version": "1.0.0",
5
+ "description": "Lean adversarial review of a hypothesis, recommendation, or diagnosis. Produces the strongest counter-argument, exposes weak assumptions and evidence gaps, identifies likely failure modes, and defines the critical tests needed to keep, revise, or reject the current claim.",
6
6
  "clarificationPrompts": [
7
- "What hypotheses or assumptions should I challenge?",
8
- "What rigor level do you need? (1=Surface, 3=Deep, 5=Maximum)",
9
- "What evidence supports these hypotheses?",
10
- "What context should I consider? (bug description, findings, constraints)"
7
+ "What hypothesis, recommendation, or diagnosis should I challenge?",
8
+ "What evidence currently supports it?",
9
+ "What depth do you need? (QUICK / STANDARD / THOROUGH; legacy rigor=1/3/5 is still accepted for compatibility)",
10
+ "What artifact name should I produce?"
11
11
  ],
12
12
  "preconditions": [
13
- "Hypotheses or assumptions are clearly stated",
14
- "Rigor level (1-5) is specified",
15
- "Supporting evidence is available",
16
- "Agent has read access to relevant context"
13
+ "A target hypothesis, recommendation, or diagnosis is available",
14
+ "Supporting evidence or reasoning is available",
15
+ "Relevant context is available for challenge"
17
16
  ],
18
17
  "metaGuidance": [
19
- "**ROUTINE PURPOSE:**",
20
- "This routine performs adversarial testing. It separates strategy (Ideating attack vectors) from execution (Challenging/Stress Testing).",
21
- "**PHASES:**",
22
- "1. IDEATE: Brainstorm attack vectors and common flaws",
23
- "2. STRATEGIZE: Define the challenge plan for the requested rigor",
24
- "3. EXECUTE: Run the challenge (Surface/Deep/Max)",
25
- "4. SYNTHESIZE: Deliver verdicts and alternative explanations",
26
- "**CORE PRINCIPLES:**",
27
- "- ADVERSARIAL: Actively try to disprove, don't confirm",
28
- "- SYSTEMATIC: Challenge assumptions, logic, and evidence",
29
- "- CONSTRUCTIVE: Goal is to strengthen truth, not just destroy"
18
+ "PURPOSE: strengthen truth by trying to break the current story.",
19
+ "ROLE: you are an adversarial reviewer, not a neutral summarizer.",
20
+ "SCOPE: challenge assumptions, evidence quality, and likely failure modes.",
21
+ "DISCIPLINE: produce concrete counter-arguments and critical tests, not vague skepticism.",
22
+ "DEPTH: QUICK = strongest counter only; STANDARD = counter + failure-mode review; THOROUGH = add alternative explanations and sharper discrimination tests.",
23
+ "COMPATIBILITY: prefer depth language (QUICK / STANDARD / THOROUGH). Treat legacy rigor values as adapter input, not the primary model."
30
24
  ],
31
25
  "steps": [
32
26
  {
33
- "id": "step-0-ideate-vectors",
34
- "title": "Step 0: Ideate Challenge Strategy",
35
- "prompt": "**IDEATE ATTACK VECTORS**\n\nBefore diving into specific hypotheses, step back and look at the whole picture.\n\n**YOUR MISSION:** Brainstorm ways to break these hypotheses.\n\n**EXECUTE:**\n1. Review all hypotheses together\n2. Identify shared assumptions (do they all assume X?)\n3. Brainstorm classes of failure (Concurrency? State? Logic?)\n4. Identify weak points in the evidence provided\n\n**REFLECT:**\n- Are these hypotheses too similar?\n- Are they missing a whole category of explanation?\n- What is the most likely \"unknown unknown\"?\n\n**WORKING NOTES:**\n- Common Assumptions\n- Potential Attack Vectors (e.g., Race Conditions, Edge Cases)\n- Evidence Weaknesses",
36
- "agentRole": "You are a red-team strategist planning your attack.",
37
- "requireConfirmation": false,
38
- "guidance": [
39
- "BRAINSTORM: Look for systemic issues first",
40
- "CATEGORIES: Think in categories (Logic, Data, Timing, Environment)",
41
- "SKEPTICISM: Assume the hypotheses are wrong. Why?"
42
- ]
27
+ "id": "step-load-target",
28
+ "title": "Step 1: Load the Target Claim and Evidence",
29
+ "prompt": "Load the current claim you are challenging.\n\nCapture:\n- the target claim in one sentence\n- the main assumptions it depends on\n- the strongest supporting evidence currently available\n- what result would count as meaningful disproof\n\nKeep this step compact and precise. The goal is to define exactly what is under challenge and what would falsify it.",
30
+ "agentRole": "You are defining exactly what claim is on trial and what would falsify it.",
31
+ "requireConfirmation": false
43
32
  },
44
33
  {
45
- "id": "step-1-plan-challenge",
46
- "title": "Step 1: Plan Challenge Tactics",
47
- "prompt": "**DEFINE CHALLENGE PLAN**\n\nNow define your specific tactics for the requested rigor level.\n\n**YOUR MISSION:** Create a concrete plan to test these hypotheses.\n\n**EXECUTE:**\n1. Map attack vectors to specific hypotheses\n2. Define **Key Questions** to answer\n3. Select **Stress Tests** or **Counter-Examples** to search for\n4. Define criteria for \"disproof\"\n\n**DELIVERABLE:**\nCreate `challenge-strategy.md`:\n- Attack Plan for each Hypothesis\n- Key Assumptions to Probe\n- Required Evidence check",
48
- "agentRole": "You are a lead auditor defining the scope of the audit.",
49
- "requireConfirmation": false,
50
- "guidance": [
51
- "TACTICS: Be specific (e.g., \"Check for null user in auth flow\")",
52
- "CRITERIA: What would convince you the hypothesis is false?"
53
- ]
34
+ "id": "step-break-claim",
35
+ "title": "Step 2: Find the Strongest Counter-Argument",
36
+ "prompt": "Find the strongest case against the current claim.\n\nChallenge it by asking:\n- What is the strongest counter-argument or competing explanation?\n- What evidence could be interpreted differently?\n- What hidden assumption is carrying too much weight?\n- What would a sharp skeptic say first?\n\nOptimize for the single strongest attack, not a long list of weak objections.",
37
+ "agentRole": "You are a sharp skeptic trying to overturn the current favorite with the strongest available attack.",
38
+ "requireConfirmation": false
54
39
  },
55
40
  {
56
- "id": "step-execute-rigor-1",
57
- "title": "Execution: Rigor 1 (Surface)",
58
- "runCondition": {
59
- "var": "rigor",
60
- "gte": 1
61
- },
62
- "prompt": "**EXECUTE RIGOR 1: SURFACE CHALLENGE**\n\nExecute your challenge plan at Rigor 1 (Surface).\n\n**MISSION:** Identify obvious flaws and simple counter-examples.\n\n**EXECUTE:**\n1. Follow `challenge-strategy.md`\n2. Check for obvious logical gaps\n3. Identify simple counter-examples\n4. Check for Occam's Razor alternatives\n\n**WORKING NOTES:**\n- Obvious Flaws\n- Simple Counter-Examples\n- Better Alternatives",
63
- "agentRole": "You are a skeptical reviewer looking for quick wins.",
64
- "requireConfirmation": false,
65
- "guidance": [
66
- "FOCUS: Obvious errors, simple logic gaps",
67
- "SPEED: Don't dig deep yet, look for low-hanging fruit"
68
- ]
69
- },
70
- {
71
- "id": "step-execute-rigor-3",
72
- "title": "Execution: Rigor 3 (Deep)",
73
- "runCondition": {
74
- "var": "rigor",
75
- "gte": 3
76
- },
77
- "prompt": "**EXECUTE RIGOR 3: DEEP CHALLENGE**\n\nExecute your challenge plan at Rigor 3 (Deep Analysis).\n\n**MISSION:** Deeply challenge with edge cases and hidden assumptions.\n\n**EXECUTE:**\n1. Follow `challenge-strategy.md`\n2. Expose hidden assumptions\n3. Generate systematic edge cases\n4. Analyze timing and environment factors\n\n**WORKING NOTES:**\n- Hidden Assumptions Exposed\n- Edge Case Analysis\n- Environmental Factors",
78
- "agentRole": "You are a rigorous auditor digging for structural flaws.",
79
- "requireConfirmation": false,
80
- "guidance": [
81
- "FOCUS: Unstated assumptions, boundary conditions",
82
- "DEPTH: trace logic chains to find breaks"
83
- ]
41
+ "id": "step-review-failure-modes",
42
+ "title": "Step 3: Review Weak Evidence and Likely Failure Modes",
43
+ "prompt": "Probe where the current claim could fail under realistic pressure.\n\nReview:\n- the weakest part of the evidence chain\n- the most likely failure modes if the claim is wrong\n- edge cases or environmental factors that could invalidate the conclusion\n- contradictions, unexplained facts, or missing proof\n\nFocus on the few things most likely to flip the conclusion rather than exhaustive enumeration.",
44
+ "agentRole": "You are testing whether the current story survives realistic pressure and real evidence quality.",
45
+ "requireConfirmation": false
84
46
  },
85
47
  {
86
- "id": "step-execute-rigor-5",
87
- "title": "Execution: Rigor 5 (Maximum)",
48
+ "id": "step-thorough-alternatives",
49
+ "title": "Step 4: Generate Alternative Explanations and Critical Tests",
88
50
  "runCondition": {
89
- "var": "rigor",
90
- "gte": 5
51
+ "or": [
52
+ { "var": "depth", "equals": "THOROUGH" },
53
+ { "var": "rigorMode", "equals": "THOROUGH" },
54
+ { "var": "rigor", "gte": 5 }
55
+ ]
91
56
  },
92
- "prompt": "**EXECUTE RIGOR 5: MAX CHALLENGE**\n\nExecute your challenge plan at Rigor 5 (Maximum Skepticism).\n\n**MISSION:** Try to break it completely.\n\n**EXECUTE:**\n1. Follow `challenge-strategy.md`\n2. Exhaustive assumption enumeration\n3. Extreme edge cases and adversarial inputs\n4. Second-order effects and chaos scenarios\n\n**WORKING NOTES:**\n- Exhaustive Challenges\n- Extreme Scenarios\n- Disproof Attempts",
93
- "agentRole": "You are a relentless adversary trying to prove it wrong.",
94
- "requireConfirmation": false,
95
- "guidance": [
96
- "FOCUS: Breaking the system, extreme edge cases",
97
- "MINDSET: Trust nothing, verify everything"
98
- ]
57
+ "prompt": "For THOROUGH review, go beyond the primary counter-argument into alternatives and discrimination strategy.\n\nProduce:\n- the 1-2 strongest alternative explanations or competing hypotheses\n- why each might beat the current claim\n- the critical tests, observations, or traces that would discriminate between them\n- what result would cause you to keep, revise, or reject the current claim\n\nThis step exists to make THOROUGH meaningfully deeper than STANDARD, not just wordier.",
58
+ "agentRole": "You are building the shortest path to proving which explanation survives.",
59
+ "requireConfirmation": false
99
60
  },
100
61
  {
101
- "id": "step-synthesize",
102
- "title": "Step 5: Synthesize Verdicts",
103
- "prompt": "**SYNTHESIZE VERDICTS**\n\nSynthesize all challenges into final verdicts.\n\n**MISSION:** Deliver clear judgments on each hypothesis.\n\n**EXECUTE:**\n1. Review all Working Notes\n2. Assign Verdicts (Keep/Revise/Reject)\n3. Prioritize Alternatives\n4. Define Critical Tests\n\n**DELIVERABLE:**\nCreate `{deliverableName}`:\n- Executive Summary\n- Hypothesis Analysis & Verdicts\n- Critical Tests Needed\n- Recommendations",
104
- "agentRole": "You are a judge delivering the final verdict.",
105
- "requireConfirmation": false,
106
- "guidance": [
107
- "VERDICTS: Be decisive based on evidence",
108
- "ALTERNATIVES: Propose concrete, better explanations",
109
- "ACTION: What specifically needs to be tested?"
110
- ]
62
+ "id": "step-deliver",
63
+ "title": "Step 5: Deliver the Challenge Verdict",
64
+ "prompt": "Create `{deliverableName}`.\n\nRequired structure:\n- Target Claim\n- Strongest Counter-Argument\n- Weak Assumptions / Evidence Gaps\n- Likely Failure Modes\n- Alternative Explanations (if explored)\n- Critical Tests\n- Verdict: Keep / Revise / Reject\n- Next Action\n\nOptimize for a compact artifact that a main workflow can interrogate and act on immediately. Prefer decisive arguments over exhaustive ceremony.",
65
+ "agentRole": "You are delivering a decisive challenge artifact for the main workflow or caller to synthesize.",
66
+ "requireConfirmation": false
111
67
  }
112
68
  ]
113
- }
69
+ }