agentxchain 2.101.0 → 2.103.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -113,6 +113,8 @@ import { intakeScanCommand } from '../src/commands/intake-scan.js';
113
113
  import { intakeResolveCommand } from '../src/commands/intake-resolve.js';
114
114
  import { intakeStatusCommand } from '../src/commands/intake-status.js';
115
115
  import { demoCommand } from '../src/commands/demo.js';
116
+ import { benchmarkCommand } from '../src/commands/benchmark.js';
117
+ import { benchmarkWorkloadsCommand } from '../src/commands/benchmark-workloads.js';
116
118
  import { historyCommand } from '../src/commands/history.js';
117
119
  import { decisionsCommand } from '../src/commands/decisions.js';
118
120
  import { diffCommand } from '../src/commands/diff.js';
@@ -290,6 +292,25 @@ program
290
292
  .option('-v, --verbose', 'Show stack traces on failure')
291
293
  .action(demoCommand);
292
294
 
295
+ const benchmarkCmd = program
296
+ .command('benchmark')
297
+ .description('Run a governed delivery compliance proof (no API keys required)')
298
+ .option('-j, --json', 'Output as structured JSON')
299
+ .option('--workload <name>', 'Run a named workload: baseline, stress, completion-recovery, or phase-drift')
300
+ .option('--stress', 'Run the adversarial retry workload instead of the baseline happy path')
301
+ .option('--output <dir>', 'Persist benchmark proof artifacts to a directory')
302
+ .action(benchmarkCommand);
303
+
304
+ benchmarkCmd
305
+ .command('workloads')
306
+ .description('List available benchmark workloads')
307
+ .option('-j, --json', 'Output as JSON')
308
+ .action((subOpts) => {
309
+ // Merge parent opts (Commander passes --json to parent, not subcommand)
310
+ const parentOpts = benchmarkCmd.opts();
311
+ benchmarkWorkloadsCommand({ ...subOpts, json: subOpts.json || parentOpts.json });
312
+ });
313
+
293
314
  const scheduleCmd = program
294
315
  .command('schedule')
295
316
  .description('Run governed schedules for repo-local lights-out execution');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentxchain",
3
- "version": "2.101.0",
3
+ "version": "2.103.0",
4
4
  "description": "CLI for AgentXchain — governed multi-agent software delivery",
5
5
  "type": "module",
6
6
  "bin": {
@@ -0,0 +1,206 @@
1
+ const DEFAULT_PHASE_ORDER = Object.freeze(['planning', 'implementation', 'qa']);
2
+
3
+ const DESIGN_PHASE_SPEC = Object.freeze({
4
+ id: 'design',
5
+ handler: 'generic',
6
+ role: Object.freeze({
7
+ id: 'architect',
8
+ title: 'Architect',
9
+ mandate: 'Design systems and validate architecture.',
10
+ write_authority: 'authoritative',
11
+ }),
12
+ runtime: Object.freeze({
13
+ id: 'manual-architect',
14
+ type: 'manual',
15
+ }),
16
+ prompt: '# Architect Prompt\nBenchmark Architect.',
17
+ allowed_next_roles: Object.freeze(['architect', 'pm', 'human']),
18
+ gate: Object.freeze({
19
+ id: 'design_signoff',
20
+ requires_files: Object.freeze(['.planning/DESIGN_SIGNOFF.md']),
21
+ requires_human_approval: true,
22
+ }),
23
+ execution: Object.freeze({
24
+ files_changed: Object.freeze(['.planning/DESIGN_SIGNOFF.md']),
25
+ files_to_write: Object.freeze([
26
+ Object.freeze({
27
+ path: '.planning/DESIGN_SIGNOFF.md',
28
+ content: '# Design Sign-Off\n\nApproved: YES\n',
29
+ }),
30
+ ]),
31
+ artifact_type: 'commit',
32
+ proposed_next_role: 'human',
33
+ decision_num: 10,
34
+ objections: Object.freeze([
35
+ Object.freeze({
36
+ id: 'OBJ-010',
37
+ severity: 'medium',
38
+ statement: 'Benchmark architecture review: validate system design.',
39
+ status: 'raised',
40
+ }),
41
+ ]),
42
+ commit_message: 'benchmark: architect design',
43
+ accept_commit_message: 'benchmark: accept architect',
44
+ gate_commit_message: 'benchmark: design gate',
45
+ }),
46
+ });
47
+
48
+ export const BENCHMARK_WORKLOADS = Object.freeze({
49
+ baseline: Object.freeze({
50
+ id: 'baseline',
51
+ label: 'Baseline',
52
+ description: 'One accepted turn per phase with no recovery branches.',
53
+ phase_order: DEFAULT_PHASE_ORDER,
54
+ rejected_turn_expected: false,
55
+ gate_failure_expected: false,
56
+ recovery_branch: 'none',
57
+ implementation: Object.freeze({
58
+ reject_invalid_first_attempt: false,
59
+ }),
60
+ qa: Object.freeze({
61
+ fail_completion_once: false,
62
+ missing_completion_files: [],
63
+ recovery_role: 'qa',
64
+ }),
65
+ }),
66
+ stress: Object.freeze({
67
+ id: 'stress',
68
+ label: 'Stress',
69
+ description: 'Reject the first implementation attempt, then recover and complete the run.',
70
+ phase_order: DEFAULT_PHASE_ORDER,
71
+ rejected_turn_expected: true,
72
+ gate_failure_expected: false,
73
+ recovery_branch: 'implementation_rejection',
74
+ implementation: Object.freeze({
75
+ reject_invalid_first_attempt: true,
76
+ }),
77
+ qa: Object.freeze({
78
+ fail_completion_once: false,
79
+ missing_completion_files: [],
80
+ recovery_role: 'qa',
81
+ }),
82
+ }),
83
+ 'completion-recovery': Object.freeze({
84
+ id: 'completion-recovery',
85
+ label: 'Completion Recovery',
86
+ description: 'Fail the first QA completion gate on a missing required artifact, then repair and complete.',
87
+ phase_order: DEFAULT_PHASE_ORDER,
88
+ rejected_turn_expected: false,
89
+ gate_failure_expected: true,
90
+ recovery_branch: 'run_completion_gate_failure',
91
+ implementation: Object.freeze({
92
+ reject_invalid_first_attempt: false,
93
+ }),
94
+ qa: Object.freeze({
95
+ fail_completion_once: true,
96
+ missing_completion_files: ['.planning/ship-verdict.md'],
97
+ recovery_role: 'qa',
98
+ }),
99
+ }),
100
+ 'phase-drift': Object.freeze({
101
+ id: 'phase-drift',
102
+ label: 'Phase Drift',
103
+ description: 'Run with a 4-phase workflow (planning → design → implementation → qa) to produce a different workflow_phase_order. Diffing against baseline proves REG-PHASE-ORDER detection.',
104
+ phase_order: Object.freeze(['planning', 'design', 'implementation', 'qa']),
105
+ rejected_turn_expected: false,
106
+ gate_failure_expected: false,
107
+ recovery_branch: 'none',
108
+ custom_phases: Object.freeze({
109
+ design: DESIGN_PHASE_SPEC,
110
+ }),
111
+ implementation: Object.freeze({
112
+ reject_invalid_first_attempt: false,
113
+ }),
114
+ qa: Object.freeze({
115
+ fail_completion_once: false,
116
+ missing_completion_files: [],
117
+ recovery_role: 'qa',
118
+ }),
119
+ }),
120
+ });
121
+
122
+ export function listBenchmarkWorkloadIds() {
123
+ return Object.keys(BENCHMARK_WORKLOADS);
124
+ }
125
+
126
+ function normalizeBenchmarkWorkloadId(value) {
127
+ if (typeof value !== 'string') {
128
+ return null;
129
+ }
130
+ const trimmed = value.trim();
131
+ if (!trimmed) {
132
+ return null;
133
+ }
134
+ return trimmed.toLowerCase().replace(/_/g, '-');
135
+ }
136
+
137
+ export function benchmarkWorkloadsCommand(opts = {}) {
138
+ const ids = listBenchmarkWorkloadIds();
139
+ const jsonMode = opts.json || false;
140
+
141
+ if (jsonMode) {
142
+ const workloads = ids.map(id => {
143
+ const w = BENCHMARK_WORKLOADS[id];
144
+ return {
145
+ id: w.id,
146
+ label: w.label,
147
+ description: w.description,
148
+ phase_order: Array.isArray(w.phase_order) ? [...w.phase_order] : [],
149
+ phase_count: Array.isArray(w.phase_order) ? w.phase_order.length : null,
150
+ rejected_turn_expected: w.rejected_turn_expected,
151
+ gate_failure_expected: w.gate_failure_expected,
152
+ recovery_branch: w.recovery_branch,
153
+ };
154
+ });
155
+ process.stdout.write(JSON.stringify({ workloads }, null, 2) + '\n');
156
+ return;
157
+ }
158
+
159
+ console.log('');
160
+ console.log(' Available benchmark workloads:');
161
+ console.log('');
162
+ for (const id of ids) {
163
+ const w = BENCHMARK_WORKLOADS[id];
164
+ const flags = [];
165
+ const phaseLabel = Array.isArray(w.phase_order) ? `phases: ${w.phase_order.join(' -> ')}` : null;
166
+ if (phaseLabel) flags.push(phaseLabel);
167
+ if (w.rejected_turn_expected) flags.push('rejected-turn');
168
+ if (w.gate_failure_expected) flags.push('gate-failure');
169
+ if (w.recovery_branch !== 'none') flags.push(`recovery: ${w.recovery_branch}`);
170
+ const flagStr = flags.length > 0 ? ` [${flags.join(', ')}]` : '';
171
+ console.log(` ${id.padEnd(22)} ${w.description}${flagStr}`);
172
+ }
173
+ console.log('');
174
+ console.log(' Usage: agentxchain benchmark --workload <name>');
175
+ console.log('');
176
+ }
177
+
178
+ export function resolveBenchmarkWorkload(opts = {}) {
179
+ const requestedWorkload = normalizeBenchmarkWorkloadId(opts.workload);
180
+ const stressRequested = Boolean(opts.stress);
181
+
182
+ if (stressRequested && requestedWorkload && requestedWorkload !== 'stress') {
183
+ return {
184
+ ok: false,
185
+ error: `Conflicting benchmark workload options: --stress implies "stress" but --workload requested "${requestedWorkload}".`,
186
+ valid_workloads: listBenchmarkWorkloadIds(),
187
+ };
188
+ }
189
+
190
+ const workloadId = requestedWorkload || (stressRequested ? 'stress' : 'baseline');
191
+ const workload = BENCHMARK_WORKLOADS[workloadId];
192
+
193
+ if (!workload) {
194
+ return {
195
+ ok: false,
196
+ error: `Unknown benchmark workload "${workloadId}". Expected one of: ${listBenchmarkWorkloadIds().join(', ')}.`,
197
+ valid_workloads: listBenchmarkWorkloadIds(),
198
+ };
199
+ }
200
+
201
+ return {
202
+ ok: true,
203
+ workload,
204
+ selected_via: requestedWorkload ? '--workload' : (stressRequested ? '--stress' : 'default'),
205
+ };
206
+ }