@adcp/client 4.22.1 → 4.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/README.md +23 -9
  2. package/bin/adcp.js +83 -18
  3. package/dist/lib/index.d.ts +3 -5
  4. package/dist/lib/index.d.ts.map +1 -1
  5. package/dist/lib/index.js +16 -12
  6. package/dist/lib/index.js.map +1 -1
  7. package/dist/lib/server/index.d.ts +5 -1
  8. package/dist/lib/server/index.d.ts.map +1 -1
  9. package/dist/lib/server/index.js +10 -1
  10. package/dist/lib/server/index.js.map +1 -1
  11. package/dist/lib/server/postgres-task-store.d.ts +105 -0
  12. package/dist/lib/server/postgres-task-store.d.ts.map +1 -0
  13. package/dist/lib/server/postgres-task-store.js +267 -0
  14. package/dist/lib/server/postgres-task-store.js.map +1 -0
  15. package/dist/lib/server/responses.d.ts +1 -0
  16. package/dist/lib/server/responses.d.ts.map +1 -1
  17. package/dist/lib/server/responses.js +1 -0
  18. package/dist/lib/server/responses.js.map +1 -1
  19. package/dist/lib/server/test-controller.d.ts +88 -0
  20. package/dist/lib/server/test-controller.d.ts.map +1 -0
  21. package/dist/lib/server/test-controller.js +227 -0
  22. package/dist/lib/server/test-controller.js.map +1 -0
  23. package/dist/lib/testing/agent-tester.d.ts +1 -1
  24. package/dist/lib/testing/agent-tester.d.ts.map +1 -1
  25. package/dist/lib/testing/agent-tester.js +13 -1
  26. package/dist/lib/testing/agent-tester.js.map +1 -1
  27. package/dist/lib/testing/compliance/comply.d.ts +24 -5
  28. package/dist/lib/testing/compliance/comply.d.ts.map +1 -1
  29. package/dist/lib/testing/compliance/comply.js +318 -277
  30. package/dist/lib/testing/compliance/comply.js.map +1 -1
  31. package/dist/lib/testing/compliance/index.d.ts +2 -1
  32. package/dist/lib/testing/compliance/index.d.ts.map +1 -1
  33. package/dist/lib/testing/compliance/index.js +6 -1
  34. package/dist/lib/testing/compliance/index.js.map +1 -1
  35. package/dist/lib/testing/compliance/platform-storyboards.d.ts +44 -0
  36. package/dist/lib/testing/compliance/platform-storyboards.d.ts.map +1 -0
  37. package/dist/lib/testing/compliance/platform-storyboards.js +232 -0
  38. package/dist/lib/testing/compliance/platform-storyboards.js.map +1 -0
  39. package/dist/lib/testing/compliance/storyboard-tracks.d.ts +2 -9
  40. package/dist/lib/testing/compliance/storyboard-tracks.d.ts.map +1 -1
  41. package/dist/lib/testing/compliance/storyboard-tracks.js +15 -46
  42. package/dist/lib/testing/compliance/storyboard-tracks.js.map +1 -1
  43. package/dist/lib/testing/compliance/types.d.ts +22 -1
  44. package/dist/lib/testing/compliance/types.d.ts.map +1 -1
  45. package/dist/lib/testing/index.d.ts +1 -1
  46. package/dist/lib/testing/index.d.ts.map +1 -1
  47. package/dist/lib/testing/index.js +6 -1
  48. package/dist/lib/testing/index.js.map +1 -1
  49. package/dist/lib/testing/orchestrator.d.ts.map +1 -1
  50. package/dist/lib/testing/orchestrator.js +5 -1
  51. package/dist/lib/testing/orchestrator.js.map +1 -1
  52. package/dist/lib/testing/scenarios/brand-rights.d.ts +19 -1
  53. package/dist/lib/testing/scenarios/brand-rights.d.ts.map +1 -1
  54. package/dist/lib/testing/scenarios/brand-rights.js +138 -1
  55. package/dist/lib/testing/scenarios/brand-rights.js.map +1 -1
  56. package/dist/lib/testing/scenarios/deterministic.js +7 -7
  57. package/dist/lib/testing/scenarios/deterministic.js.map +1 -1
  58. package/dist/lib/testing/scenarios/index.d.ts +1 -1
  59. package/dist/lib/testing/scenarios/index.d.ts.map +1 -1
  60. package/dist/lib/testing/scenarios/index.js +4 -2
  61. package/dist/lib/testing/scenarios/index.js.map +1 -1
  62. package/dist/lib/testing/scenarios/media-buy.js +4 -4
  63. package/dist/lib/testing/scenarios/media-buy.js.map +1 -1
  64. package/dist/lib/testing/storyboard/loader.d.ts +1 -0
  65. package/dist/lib/testing/storyboard/loader.d.ts.map +1 -1
  66. package/dist/lib/testing/storyboard/loader.js +14 -0
  67. package/dist/lib/testing/storyboard/loader.js.map +1 -1
  68. package/dist/lib/testing/storyboard/request-builder.d.ts.map +1 -1
  69. package/dist/lib/testing/storyboard/request-builder.js +88 -11
  70. package/dist/lib/testing/storyboard/request-builder.js.map +1 -1
  71. package/dist/lib/testing/storyboard/runner.d.ts.map +1 -1
  72. package/dist/lib/testing/storyboard/runner.js +83 -5
  73. package/dist/lib/testing/storyboard/runner.js.map +1 -1
  74. package/dist/lib/testing/storyboard/task-map.d.ts +2 -0
  75. package/dist/lib/testing/storyboard/task-map.d.ts.map +1 -1
  76. package/dist/lib/testing/storyboard/task-map.js +23 -9
  77. package/dist/lib/testing/storyboard/task-map.js.map +1 -1
  78. package/dist/lib/testing/storyboard/types.d.ts +6 -2
  79. package/dist/lib/testing/storyboard/types.d.ts.map +1 -1
  80. package/dist/lib/testing/storyboard/validations.d.ts.map +1 -1
  81. package/dist/lib/testing/storyboard/validations.js +21 -4
  82. package/dist/lib/testing/storyboard/validations.js.map +1 -1
  83. package/dist/lib/testing/types.d.ts +1 -1
  84. package/dist/lib/testing/types.d.ts.map +1 -1
  85. package/dist/lib/types/core.generated.d.ts +242 -3
  86. package/dist/lib/types/core.generated.d.ts.map +1 -1
  87. package/dist/lib/types/core.generated.js +1 -1
  88. package/dist/lib/types/schemas.generated.d.ts +3697 -3468
  89. package/dist/lib/types/schemas.generated.d.ts.map +1 -1
  90. package/dist/lib/types/schemas.generated.js +226 -118
  91. package/dist/lib/types/schemas.generated.js.map +1 -1
  92. package/dist/lib/types/tools.generated.d.ts +281 -79
  93. package/dist/lib/types/tools.generated.d.ts.map +1 -1
  94. package/dist/lib/utils/capabilities.d.ts +2 -2
  95. package/dist/lib/utils/capabilities.d.ts.map +1 -1
  96. package/dist/lib/utils/capabilities.js +9 -3
  97. package/dist/lib/utils/capabilities.js.map +1 -1
  98. package/dist/lib/utils/response-schemas.d.ts.map +1 -1
  99. package/dist/lib/utils/response-schemas.js +9 -0
  100. package/dist/lib/utils/response-schemas.js.map +1 -1
  101. package/dist/lib/version.d.ts +3 -3
  102. package/dist/lib/version.js +3 -3
  103. package/docs/llms.txt +56 -32
  104. package/package.json +8 -2
  105. package/skills/adcp/SKILL.md +118 -33
  106. package/skills/build-creative-agent/SKILL.md +221 -0
  107. package/skills/build-generative-seller-agent/SKILL.md +288 -0
  108. package/skills/build-retail-media-agent/SKILL.md +237 -0
  109. package/skills/build-seller-agent/SKILL.md +313 -0
  110. package/skills/build-signals-agent/SKILL.md +203 -0
  111. package/storyboards/audience_sync.yaml +18 -29
  112. package/storyboards/behavioral_analysis.yaml +40 -72
  113. package/storyboards/brand_rights.yaml +172 -75
  114. package/storyboards/campaign_governance_conditions.yaml +187 -0
  115. package/storyboards/campaign_governance_delivery.yaml +231 -0
  116. package/storyboards/campaign_governance_denied.yaml +136 -0
  117. package/storyboards/capability_discovery.yaml +106 -0
  118. package/storyboards/content_standards.yaml +251 -0
  119. package/storyboards/creative_ad_server.yaml +108 -16
  120. package/storyboards/creative_generative.yaml +317 -0
  121. package/storyboards/creative_lifecycle.yaml +284 -0
  122. package/storyboards/creative_sales_agent.yaml +2 -6
  123. package/storyboards/creative_template.yaml +3 -6
  124. package/storyboards/deterministic_testing.yaml +271 -245
  125. package/storyboards/error_compliance.yaml +105 -108
  126. package/storyboards/media_buy_catalog_creative.yaml +8 -5
  127. package/storyboards/media_buy_generative_seller.yaml +581 -0
  128. package/storyboards/media_buy_governance_escalation.yaml +10 -6
  129. package/storyboards/media_buy_guaranteed_approval.yaml +21 -19
  130. package/storyboards/media_buy_non_guaranteed.yaml +9 -8
  131. package/storyboards/media_buy_proposal_mode.yaml +12 -11
  132. package/storyboards/media_buy_seller.yaml +161 -173
  133. package/storyboards/media_buy_state_machine.yaml +102 -101
  134. package/storyboards/property_governance.yaml +239 -0
  135. package/storyboards/schema.yaml +3 -2
  136. package/storyboards/schema_validation.yaml +58 -51
  137. package/storyboards/si_session.yaml +99 -317
  138. package/storyboards/signal_marketplace.yaml +9 -5
  139. package/storyboards/signal_owned.yaml +6 -5
  140. package/storyboards/social_platform.yaml +274 -0
  141. package/storyboards/test-kits/acme-outdoor.yaml +118 -0
  142. package/storyboards/test-kits/nova-motors.yaml +134 -0
  143. package/storyboards/governance_content_standards.yaml +0 -213
  144. package/storyboards/governance_property_lists.yaml +0 -372
@@ -2,8 +2,10 @@
2
2
  /**
3
3
  * Compliance Engine
4
4
  *
5
- * Runs all applicable capability tracks against an agent
6
- * and reports results for every track never stops at the first failure.
5
+ * Storyboard-driven compliance assessment. Storyboards are the routing
6
+ * mechanism; tracks are a reporting layer derived from storyboard results.
7
+ *
8
+ * Resolution priority: storyboards > platform_type > all applicable.
7
9
  */
8
10
  var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
9
11
  if (k2 === undefined) k2 = k;
@@ -45,98 +47,15 @@ exports.formatComplianceResults = formatComplianceResults;
45
47
  exports.formatComplianceResultsJSON = formatComplianceResultsJSON;
46
48
  const client_1 = require("../client");
47
49
  const storyboard_tracks_1 = require("./storyboard-tracks");
50
+ const runner_1 = require("../storyboard/runner");
51
+ const loader_1 = require("../storyboard/loader");
52
+ const platform_storyboards_1 = require("./platform-storyboards");
48
53
  const profiles_1 = require("./profiles");
49
54
  const mcp_1 = require("../../protocols/mcp");
50
55
  const test_controller_1 = require("../test-controller");
51
56
  /**
52
- * Maps each track to its constituent scenarios and a human-readable label.
53
- */
54
- const TRACK_DEFINITIONS = {
55
- core: {
56
- label: 'Core Protocol',
57
- scenarios: [
58
- 'health_check',
59
- 'discovery',
60
- 'capability_discovery',
61
- 'schema_compliance',
62
- 'controller_validation',
63
- 'deterministic_account',
64
- ],
65
- },
66
- products: {
67
- label: 'Product Discovery',
68
- scenarios: ['pricing_edge_cases', 'behavior_analysis', 'response_consistency'],
69
- },
70
- media_buy: {
71
- label: 'Media Buy Lifecycle',
72
- scenarios: [
73
- 'create_media_buy',
74
- 'full_sales_flow',
75
- 'creative_inline',
76
- 'temporal_validation',
77
- 'media_buy_lifecycle',
78
- 'terminal_state_enforcement',
79
- 'package_lifecycle',
80
- 'seller_governance_context',
81
- 'deterministic_media_buy',
82
- 'deterministic_budget',
83
- ],
84
- },
85
- creative: {
86
- label: 'Creative Management',
87
- scenarios: ['creative_sync', 'creative_flow', 'deterministic_creative'],
88
- },
89
- reporting: {
90
- label: 'Reporting',
91
- scenarios: ['reporting_flow', 'deterministic_delivery'],
92
- },
93
- governance: {
94
- label: 'Governance',
95
- scenarios: ['governance_property_lists', 'governance_content_standards', 'property_list_filters'],
96
- },
97
- campaign_governance: {
98
- label: 'Campaign Governance',
99
- scenarios: [
100
- 'campaign_governance',
101
- 'campaign_governance_denied',
102
- 'campaign_governance_conditions',
103
- 'campaign_governance_delivery',
104
- ],
105
- },
106
- signals: {
107
- label: 'Signals',
108
- scenarios: ['signals_flow'],
109
- },
110
- si: {
111
- label: 'Sponsored Intelligence',
112
- scenarios: ['si_session_lifecycle', 'si_availability', 'si_handoff', 'deterministic_session'],
113
- },
114
- audiences: {
115
- label: 'Audience Management',
116
- scenarios: ['sync_audiences'],
117
- },
118
- error_handling: {
119
- label: 'Error Compliance',
120
- scenarios: ['error_codes', 'error_structure', 'error_transport'],
121
- },
122
- };
123
- /**
124
- * Which tools make a track "applicable" — if the agent has at least one
125
- * of these tools, the track should be attempted.
57
+ * All compliance tracks in display order.
126
58
  */
127
- const TRACK_RELEVANCE = {
128
- core: [], // always applicable
129
- products: ['get_products'],
130
- media_buy: ['create_media_buy', 'update_media_buy', 'get_media_buys'],
131
- creative: ['sync_creatives', 'build_creative', 'list_creative_formats'],
132
- reporting: ['get_media_buy_delivery'],
133
- governance: ['create_property_list', 'list_content_standards'],
134
- campaign_governance: ['sync_plans', 'check_governance'],
135
- signals: ['get_signals'],
136
- si: ['si_initiate_session'],
137
- audiences: ['sync_audiences'],
138
- error_handling: ['create_media_buy'],
139
- };
140
59
  const TRACK_ORDER = [
141
60
  'core',
142
61
  'products',
@@ -149,52 +68,8 @@ const TRACK_ORDER = [
149
68
  'si',
150
69
  'audiences',
151
70
  'error_handling',
71
+ 'brand',
152
72
  ];
153
- function isTrackApplicable(track, tools) {
154
- const relevantTools = TRACK_RELEVANCE[track];
155
- if (relevantTools.length === 0)
156
- return true;
157
- return relevantTools.some(t => tools.includes(t));
158
- }
159
- function isAuthError(step) {
160
- if (!step.error || step.passed)
161
- return false;
162
- const e = step.error.toLowerCase();
163
- return (e.includes('authentication') ||
164
- e.includes('x-adcp-auth') ||
165
- e.includes('unauthorized') ||
166
- e.includes('missing auth') ||
167
- e.includes('401'));
168
- }
169
- /**
170
- * Check if a scenario failed entirely due to auth errors.
171
- * Returns true if every failed step is an auth error.
172
- */
173
- function isAuthOnlyFailure(result) {
174
- if (result.overall_passed)
175
- return false;
176
- const failedSteps = (result.steps ?? []).filter(s => !s.passed);
177
- return failedSteps.length > 0 && failedSteps.every(isAuthError);
178
- }
179
- function computeTrackStatus(results, skippedCount, hasAuth) {
180
- if (results.length === 0)
181
- return 'skip';
182
- // When running without auth, scenarios that failed only due to auth
183
- // don't count as failures
184
- const effectiveResults = results.map(r => {
185
- if (!hasAuth && isAuthOnlyFailure(r)) {
186
- return { ...r, _authSkipped: true, overall_passed: true };
187
- }
188
- return r;
189
- });
190
- const passed = effectiveResults.filter(r => r.overall_passed).length;
191
- const total = effectiveResults.length;
192
- if (passed === total)
193
- return 'pass';
194
- if (passed === 0)
195
- return 'fail';
196
- return 'partial';
197
- }
198
73
  /**
199
74
  * Collect advisory observations from test results.
200
75
  * Analyzes the actual data for quality signals that aren't pass/fail.
@@ -264,7 +139,6 @@ function collectObservations(track, results, profile) {
264
139
  // Media buy track observations
265
140
  if (track === 'media_buy') {
266
141
  // Check for valid_actions support (first match only)
267
- // Only steps with observation_data are considered — snapshot-only steps don't set it.
268
142
  let checkedValidActions = false;
269
143
  for (const result of results) {
270
144
  if (checkedValidActions)
@@ -281,7 +155,6 @@ function collectObservations(track, results, profile) {
281
155
  'Without valid_actions, buyer agents must hardcode the state machine to know what operations are permitted.',
282
156
  });
283
157
  }
284
- // Check creative_deadline support
285
158
  if (obs.has_creative_deadline === false) {
286
159
  observations.push({
287
160
  category: 'best_practice',
@@ -291,7 +164,6 @@ function collectObservations(track, results, profile) {
291
164
  'Buyers need to know when creative uploads must be finalized to avoid rejected submissions.',
292
165
  });
293
166
  }
294
- // Check history entry shape when present
295
167
  if (obs.history_entries && obs.history_entries > 0 && obs.history_valid === false) {
296
168
  observations.push({
297
169
  category: 'best_practice',
@@ -301,7 +173,6 @@ function collectObservations(track, results, profile) {
301
173
  'History entries must include at least timestamp and action to be useful for audit.',
302
174
  });
303
175
  }
304
- // Check dry_run/sandbox confirmation
305
176
  if (obs.sandbox === undefined || obs.sandbox === null) {
306
177
  observations.push({
307
178
  category: 'best_practice',
@@ -387,6 +258,15 @@ function collectObservations(track, results, profile) {
387
258
  'Buyers need to distinguish buyer-initiated from seller-initiated cancellations.',
388
259
  });
389
260
  }
261
+ if (!obs.canceled_at) {
262
+ observations.push({
263
+ category: 'completeness',
264
+ severity: 'warning',
265
+ track,
266
+ message: 'Agent transitions to canceled status but does not include canceled_at timestamp. ' +
267
+ 'A cancellation timestamp is required for audit and reconciliation.',
268
+ });
269
+ }
390
270
  checkedCancellation = true;
391
271
  }
392
272
  }
@@ -484,7 +364,16 @@ function collectObservations(track, results, profile) {
484
364
  }
485
365
  /**
486
366
  * Run compliance assessment against an agent.
487
- * Assesses all applicable tracks independently never stops at first failure.
367
+ * Assesses all applicable storyboards and reports results grouped by track.
368
+ *
369
+ * Resolution priority:
370
+ * 1. options.storyboards — run exactly these storyboard IDs
371
+ * 2. options.platform_type (when tracks is not set) — resolve via PLATFORM_STORYBOARDS
372
+ * 3. options.tracks — run all storyboards for these tracks
373
+ * 4. Default — run all applicable storyboards
374
+ *
375
+ * When platform_type is set, it always drives coherence checking regardless
376
+ * of how the storyboard pool was resolved.
488
377
  */
489
378
  async function comply(agentUrl, options = {}) {
490
379
  try {
@@ -494,10 +383,154 @@ async function comply(agentUrl, options = {}) {
494
383
  await (0, mcp_1.closeMCPConnections)();
495
384
  }
496
385
  }
386
+ // ────────────────────────────────────────────────────────────
387
+ // Storyboard resolution
388
+ // ────────────────────────────────────────────────────────────
389
+ /**
390
+ * Resolve the storyboard pool based on options.
391
+ * Priority: storyboards > platform_type (when tracks is not set) > tracks > all bundled.
392
+ */
393
+ function resolveStoryboards(options) {
394
+ // Explicit storyboard IDs — highest priority
395
+ if (options.storyboards?.length) {
396
+ const resolved = [];
397
+ for (const id of options.storyboards) {
398
+ const sb = (0, loader_1.getStoryboardById)(id);
399
+ if (!sb) {
400
+ throw new Error(`Unknown storyboard ID: "${id}". Use listStoryboards() to see available IDs.`);
401
+ }
402
+ resolved.push(sb);
403
+ }
404
+ return resolved;
405
+ }
406
+ // Platform type — resolve via PLATFORM_STORYBOARDS
407
+ if (options.platform_type && !options.tracks) {
408
+ const pt = options.platform_type;
409
+ const ids = platform_storyboards_1.PLATFORM_STORYBOARDS[pt];
410
+ if (ids) {
411
+ const resolved = [];
412
+ for (const id of ids) {
413
+ const sb = (0, loader_1.getStoryboardById)(id);
414
+ if (sb) {
415
+ resolved.push(sb);
416
+ }
417
+ else {
418
+ // Data integrity issue — storyboard declared in PLATFORM_STORYBOARDS
419
+ // but not found in bundled set. This is a packaging bug.
420
+ console.warn(`PLATFORM_STORYBOARDS[${pt}] references unknown storyboard "${id}"`);
421
+ }
422
+ }
423
+ // Also include universal storyboards (no platform_types) not already in the set
424
+ const resolvedIds = new Set(resolved.map(s => s.id));
425
+ for (const sb of (0, loader_1.loadBundledStoryboards)()) {
426
+ if (!sb.track)
427
+ continue;
428
+ if (resolvedIds.has(sb.id))
429
+ continue;
430
+ if (!sb.platform_types?.length) {
431
+ resolved.push(sb);
432
+ }
433
+ }
434
+ return resolved;
435
+ }
436
+ }
437
+ // Track filter — run storyboards whose track field matches
438
+ if (options.tracks?.length) {
439
+ const trackSet = new Set(options.tracks);
440
+ return (0, loader_1.loadBundledStoryboards)().filter(sb => sb.track && trackSet.has(sb.track));
441
+ }
442
+ // Default — all compliance storyboards (those with a track field)
443
+ return (0, loader_1.loadBundledStoryboards)().filter(sb => sb.track);
444
+ }
445
+ /**
446
+ * Filter storyboards to those applicable for the agent's tools.
447
+ * A storyboard is applicable if the agent has at least one of its required_tools,
448
+ * or if it has no required_tools at all.
449
+ */
450
+ function filterApplicable(storyboards, agentTools) {
451
+ return storyboards.filter(sb => {
452
+ if (!sb.required_tools?.length)
453
+ return true;
454
+ return sb.required_tools.some(tool => agentTools.includes(tool));
455
+ });
456
+ }
457
+ /**
458
+ * Group storyboard results by track.
459
+ */
460
+ function groupByTrack(results, storyboards) {
461
+ // Build a storyboard ID → track lookup
462
+ const trackLookup = new Map();
463
+ for (const sb of storyboards) {
464
+ if (sb.track) {
465
+ trackLookup.set(sb.id, sb.track);
466
+ }
467
+ }
468
+ const grouped = new Map();
469
+ for (const result of results) {
470
+ const track = trackLookup.get(result.storyboard_id);
471
+ if (!track)
472
+ continue;
473
+ if (!grouped.has(track))
474
+ grouped.set(track, []);
475
+ grouped.get(track).push(result);
476
+ }
477
+ return grouped;
478
+ }
479
+ // ────────────────────────────────────────────────────────────
480
+ // Failure extraction
481
+ // ────────────────────────────────────────────────────────────
482
+ /**
483
+ * Extract a flat list of failures from raw storyboard results.
484
+ * Preserves step_id and expected text from the storyboard YAML,
485
+ * and includes a fix_command for targeted re-running.
486
+ */
487
+ function extractFailures(results, storyboards, agentRef) {
488
+ const failures = [];
489
+ // Build storyboard lookup for track and expected text
490
+ const sbLookup = new Map();
491
+ for (const sb of storyboards) {
492
+ sbLookup.set(sb.id, sb);
493
+ }
494
+ for (const result of results) {
495
+ const sb = sbLookup.get(result.storyboard_id);
496
+ const track = sb?.track ?? 'core';
497
+ for (const phase of result.phases) {
498
+ for (const step of phase.steps) {
499
+ if (step.passed || step.skipped)
500
+ continue;
501
+ // Find the step definition in the storyboard for expected text
502
+ let expected;
503
+ if (sb) {
504
+ for (const p of sb.phases) {
505
+ const stepDef = p.steps.find(s => s.id === step.step_id);
506
+ if (stepDef?.expected) {
507
+ expected = stepDef.expected.trim();
508
+ break;
509
+ }
510
+ }
511
+ }
512
+ failures.push({
513
+ track,
514
+ storyboard_id: result.storyboard_id,
515
+ step_id: step.step_id,
516
+ step_title: step.title,
517
+ task: step.task,
518
+ error: step.error,
519
+ expected,
520
+ fix_command: `adcp storyboard step ${agentRef} ${result.storyboard_id} ${step.step_id} --json`,
521
+ });
522
+ }
523
+ }
524
+ }
525
+ return failures;
526
+ }
527
+ // ────────────────────────────────────────────────────────────
528
+ // Core implementation
529
+ // ────────────────────────────────────────────────────────────
497
530
  async function complyImpl(agentUrl, options) {
498
531
  const start = Date.now();
499
- const { tracks: trackFilter, platform_type, timeout_ms, signal: externalSignal, ...testOptions } = options;
500
- // Validate platform_type if provided (issue #402: accept string, validate internally)
532
+ const { storyboards: _storyboardIds, tracks: _trackFilter, platform_type, timeout_ms, signal: externalSignal, ...testOptions } = options;
533
+ // Validate platform_type if provided
501
534
  let platformProfile;
502
535
  if (platform_type) {
503
536
  const validTypes = (0, profiles_1.getAllPlatformTypes)();
@@ -537,14 +570,14 @@ async function complyImpl(agentUrl, options) {
537
570
  };
538
571
  // Check for abort before starting
539
572
  signal?.throwIfAborted();
540
- // Collect observations across all tracks (declared early for tool discovery diagnostics)
573
+ // Collect observations across all tracks
541
574
  const allObservations = [];
542
- // Discover agent capabilities once and share across all scenarios
575
+ // Discover agent capabilities once and share across all storyboards
543
576
  const client = (0, client_1.createTestClient)(agentUrl, effectiveOptions.protocol ?? 'mcp', effectiveOptions);
544
577
  const { profile, step: profileStep } = await (0, client_1.discoverAgentProfile)(client);
545
578
  effectiveOptions._client = client;
546
579
  effectiveOptions._profile = profile;
547
- // Log discovered tools for diagnostic purposes
580
+ // Log discovered tools
548
581
  if (profileStep.passed) {
549
582
  allObservations.push({
550
583
  category: 'tool_discovery',
@@ -562,145 +595,62 @@ async function complyImpl(agentUrl, options) {
562
595
  }
563
596
  }
564
597
  if (!profileStep.passed) {
565
- const errorMsg = profileStep.error || 'Unknown error';
566
- const observations = [];
567
- // Check for auth errors — either explicit 401/Unauthorized or MCP SDK's generic
568
- // "Failed to discover" which often wraps a 401
569
- const isExplicitAuthError = errorMsg.includes('401') ||
570
- errorMsg.includes('Unauthorized') ||
571
- errorMsg.includes('unauthorized') ||
572
- errorMsg.includes('authentication') ||
573
- errorMsg.includes('JWS') ||
574
- errorMsg.includes('JWT') ||
575
- errorMsg.includes('signature verification');
576
- // When MCP SDK wraps the error, probe the endpoint directly
577
- let isAuthError = isExplicitAuthError;
578
- if (!isAuthError && errorMsg.includes('Failed to discover')) {
579
- try {
580
- const probe = await fetch(agentUrl, {
581
- method: 'POST',
582
- headers: { 'Content-Type': 'application/json' },
583
- signal,
584
- });
585
- if (probe.status === 401 || probe.status === 403) {
586
- isAuthError = true;
587
- }
588
- }
589
- catch {
590
- // Network error — not an auth issue
591
- }
592
- }
593
- const headline = isAuthError ? `Authentication required` : `Agent unreachable — ${errorMsg}`;
594
- if (isAuthError) {
595
- // Check if agent supports OAuth
596
- const { discoverOAuthMetadata } = await Promise.resolve().then(() => __importStar(require('../../auth/oauth/discovery')));
597
- const oauthMeta = await discoverOAuthMetadata(agentUrl);
598
- if (oauthMeta) {
599
- observations.push({
600
- category: 'auth',
601
- severity: 'error',
602
- message: `Agent requires OAuth (issuer: ${oauthMeta.issuer || 'unknown'}). Save credentials: adcp --save-auth <alias> ${agentUrl} --oauth`,
603
- });
604
- }
605
- else {
606
- observations.push({
607
- category: 'auth',
608
- severity: 'error',
609
- message: 'Agent returned 401. Check your --auth token.',
610
- });
611
- }
612
- }
613
- return {
614
- agent_url: agentUrl,
615
- agent_profile: profile,
616
- overall_status: (isAuthError ? 'auth_required' : 'unreachable'),
617
- tracks: [],
618
- tested_tracks: [],
619
- skipped_tracks: [],
620
- expected_tracks: [],
621
- summary: {
622
- tracks_passed: 0,
623
- tracks_failed: 0,
624
- tracks_skipped: 0,
625
- tracks_partial: 0,
626
- tracks_expected: 0,
627
- headline,
628
- },
629
- observations,
630
- tested_at: new Date().toISOString(),
631
- total_duration_ms: Date.now() - start,
632
- dry_run: effectiveOptions.dry_run !== false,
633
- };
598
+ return buildUnreachableResult(agentUrl, profile, profileStep.error, start, effectiveOptions, signal);
634
599
  }
635
- const tracksToRun = trackFilter ?? TRACK_ORDER;
636
- const trackResults = [];
637
- for (const track of tracksToRun) {
638
- // Check for abort between tracks
600
+ // Resolve and filter storyboard pool
601
+ const allStoryboards = resolveStoryboards(options);
602
+ const applicableStoryboards = filterApplicable(allStoryboards, profile.tools);
603
+ // Run storyboards
604
+ const storyboardResults = [];
605
+ const runOptions = {
606
+ ...effectiveOptions,
607
+ agentTools: profile.tools,
608
+ };
609
+ for (const sb of applicableStoryboards) {
639
610
  signal?.throwIfAborted();
640
- const def = TRACK_DEFINITIONS[track];
641
- if (!def)
611
+ const result = await (0, runner_1.runStoryboard)(agentUrl, sb, runOptions);
612
+ storyboardResults.push(result);
613
+ }
614
+ // Group results by track and build TrackResults
615
+ const grouped = groupByTrack(storyboardResults, applicableStoryboards);
616
+ const trackResults = [];
617
+ // Determine which tracks had storyboards in the pool (even if filtered out by tools)
618
+ const poolTrackSet = new Set();
619
+ for (const sb of allStoryboards) {
620
+ if (sb.track)
621
+ poolTrackSet.add(sb.track);
622
+ }
623
+ for (const track of TRACK_ORDER) {
624
+ if (!poolTrackSet.has(track))
642
625
  continue;
643
- if (!isTrackApplicable(track, profile.tools)) {
626
+ const results = grouped.get(track) ?? [];
627
+ if (results.length > 0) {
628
+ const trackResult = (0, storyboard_tracks_1.mapStoryboardResultsToTrackResult)(track, results, profile);
629
+ const observations = collectObservations(track, trackResult.scenarios, profile);
630
+ trackResult.observations = observations;
631
+ allObservations.push(...observations);
632
+ trackResults.push(trackResult);
633
+ }
634
+ else {
635
+ // Track was in the pool but no storyboards ran (agent lacks tools)
644
636
  const isExpected = track !== 'core' && (platformProfile?.expected_tracks.includes(track) ?? false);
645
- const requiredTools = TRACK_RELEVANCE[track];
646
- const trackObservations = [];
647
- if (requiredTools.length > 0) {
648
- trackObservations.push({
649
- category: 'tool_discovery',
650
- severity: isExpected ? 'warning' : 'info',
651
- message: `Track "${track}" skipped: agent does not advertise any of [${requiredTools.join(', ')}]. ` +
652
- `Agent tools: [${profile.tools.join(', ')}]`,
653
- evidence: { expected_tools: requiredTools, agent_tool_count: profile.tools.length },
654
- });
655
- }
656
- allObservations.push(...trackObservations);
657
637
  trackResults.push({
658
638
  track,
659
639
  status: isExpected ? 'expected' : 'skip',
660
- label: def.label,
661
- scenarios: [],
662
- skipped_scenarios: def.scenarios,
663
- observations: trackObservations,
664
- duration_ms: 0,
665
- });
666
- continue;
667
- }
668
- const trackStart = Date.now();
669
- // Run compliance storyboards for this track
670
- const storyboardResults = await (0, storyboard_tracks_1.runTrackStoryboards)(agentUrl, track, profile.tools, {
671
- ...effectiveOptions,
672
- agentTools: profile.tools,
673
- });
674
- let trackResult;
675
- if (storyboardResults.length > 0) {
676
- // Map storyboard results to TrackResult for backwards compat
677
- trackResult = (0, storyboard_tracks_1.mapStoryboardResultsToTrackResult)(track, storyboardResults, profile);
678
- }
679
- else {
680
- // No storyboards for this track — skip
681
- trackResult = {
682
- track,
683
- status: 'skip',
684
- label: def.label,
640
+ label: storyboard_tracks_1.TRACK_LABELS[track] || track,
685
641
  scenarios: [],
686
642
  skipped_scenarios: [],
687
643
  observations: [],
688
644
  duration_ms: 0,
689
- };
645
+ });
690
646
  }
691
- // Collect observations from track results and agent profile
692
- const observations = collectObservations(track, trackResult.scenarios, profile);
693
- trackResult.observations = observations;
694
- trackResult.duration_ms = Date.now() - trackStart;
695
- allObservations.push(...observations);
696
- trackResults.push(trackResult);
697
647
  }
698
648
  // Build platform coherence result if platform type was declared
699
649
  let platformCoherence;
700
650
  if (platformProfile) {
701
651
  const findings = platformProfile.checkCoherence(profile);
702
- const missingTracks = platformProfile.expected_tracks.filter(t => !isTrackApplicable(t, profile.tools) && t !== 'core');
703
- // Add coherence findings as observations
652
+ const applicableTrackSet = new Set(trackResults.filter(t => t.status !== 'skip' && t.status !== 'expected').map(t => t.track));
653
+ const missingTracks = platformProfile.expected_tracks.filter(t => !applicableTrackSet.has(t) && t !== 'core');
704
654
  for (const finding of findings) {
705
655
  allObservations.push({
706
656
  category: 'coherence',
@@ -720,18 +670,14 @@ async function complyImpl(agentUrl, options) {
720
670
  };
721
671
  }
722
672
  const summary = buildSummary(trackResults);
723
- // Partition tracks by disposition (issue #403)
724
673
  const testedTracks = trackResults.filter(t => t.status === 'pass' || t.status === 'fail' || t.status === 'partial');
725
674
  const skippedTracks = trackResults
726
675
  .filter(t => t.status === 'skip')
727
- .map(t => {
728
- const required = TRACK_RELEVANCE[t.track];
729
- return {
730
- track: t.track,
731
- label: t.label,
732
- reason: required.length > 0 ? `Agent lacks required tools: ${required.join(', ')}` : 'Agent lacks required tools',
733
- };
734
- });
676
+ .map(t => ({
677
+ track: t.track,
678
+ label: t.label,
679
+ reason: 'Agent lacks required tools for applicable storyboards',
680
+ }));
735
681
  const expectedTracks = trackResults
736
682
  .filter(t => t.status === 'expected')
737
683
  .map(t => ({
@@ -739,8 +685,10 @@ async function complyImpl(agentUrl, options) {
739
685
  label: t.label,
740
686
  reason: `Expected for ${platformCoherence?.label ?? 'declared platform type'}`,
741
687
  }));
742
- // Compute overall status (issue #401)
743
688
  const overallStatus = computeOverallStatus(summary);
689
+ // Build flat failures array from raw storyboard results (preserves step_id and expected)
690
+ const agentRef = options.agent_alias || agentUrl;
691
+ const failures = extractFailures(storyboardResults, applicableStoryboards, agentRef);
744
692
  return {
745
693
  agent_url: agentUrl,
746
694
  agent_profile: profile,
@@ -751,7 +699,9 @@ async function complyImpl(agentUrl, options) {
751
699
  expected_tracks: expectedTracks,
752
700
  summary,
753
701
  observations: allObservations,
702
+ failures: failures.length > 0 ? failures : undefined,
754
703
  platform_coherence: platformCoherence,
704
+ storyboards_executed: applicableStoryboards.map(sb => sb.id),
755
705
  controller_detected: controllerDetection.detected,
756
706
  controller_scenarios: controllerDetection.detected ? controllerDetection.scenarios : undefined,
757
707
  tested_at: new Date().toISOString(),
@@ -767,6 +717,77 @@ async function complyImpl(agentUrl, options) {
767
717
  }
768
718
  }
769
719
  }
720
+ /**
721
+ * Build result for an unreachable or auth-required agent.
722
+ */
723
+ async function buildUnreachableResult(agentUrl, profile, errorMsg, start, effectiveOptions, signal) {
724
+ const err = errorMsg || 'Unknown error';
725
+ const observations = [];
726
+ const isExplicitAuthError = err.includes('401') ||
727
+ err.includes('Unauthorized') ||
728
+ err.includes('unauthorized') ||
729
+ err.includes('authentication') ||
730
+ err.includes('JWS') ||
731
+ err.includes('JWT') ||
732
+ err.includes('signature verification');
733
+ let isAuthError = isExplicitAuthError;
734
+ if (!isAuthError && err.includes('Failed to discover')) {
735
+ try {
736
+ const probe = await fetch(agentUrl, {
737
+ method: 'POST',
738
+ headers: { 'Content-Type': 'application/json' },
739
+ signal,
740
+ });
741
+ if (probe.status === 401 || probe.status === 403) {
742
+ isAuthError = true;
743
+ }
744
+ }
745
+ catch {
746
+ // Network error — not an auth issue
747
+ }
748
+ }
749
+ const headline = isAuthError ? `Authentication required` : `Agent unreachable — ${err}`;
750
+ if (isAuthError) {
751
+ const { discoverOAuthMetadata } = await Promise.resolve().then(() => __importStar(require('../../auth/oauth/discovery')));
752
+ const oauthMeta = await discoverOAuthMetadata(agentUrl);
753
+ if (oauthMeta) {
754
+ observations.push({
755
+ category: 'auth',
756
+ severity: 'error',
757
+ message: `Agent requires OAuth (issuer: ${oauthMeta.issuer || 'unknown'}). Save credentials: adcp --save-auth <alias> ${agentUrl} --oauth`,
758
+ });
759
+ }
760
+ else {
761
+ observations.push({
762
+ category: 'auth',
763
+ severity: 'error',
764
+ message: 'Agent returned 401. Check your --auth token.',
765
+ });
766
+ }
767
+ }
768
+ return {
769
+ agent_url: agentUrl,
770
+ agent_profile: profile,
771
+ overall_status: (isAuthError ? 'auth_required' : 'unreachable'),
772
+ tracks: [],
773
+ tested_tracks: [],
774
+ skipped_tracks: [],
775
+ expected_tracks: [],
776
+ summary: {
777
+ tracks_passed: 0,
778
+ tracks_failed: 0,
779
+ tracks_skipped: 0,
780
+ tracks_partial: 0,
781
+ tracks_expected: 0,
782
+ headline,
783
+ },
784
+ observations,
785
+ storyboards_executed: [],
786
+ tested_at: new Date().toISOString(),
787
+ total_duration_ms: Date.now() - start,
788
+ dry_run: effectiveOptions.dry_run !== false,
789
+ };
790
+ }
770
791
  /**
771
792
  * Compute overall status for a reachable agent.
772
793
  * auth_required and unreachable are set directly in the early-exit path.
@@ -836,7 +857,11 @@ function formatComplianceResults(result) {
836
857
  if (result.platform_coherence) {
837
858
  output += `Platform: ${result.platform_coherence.label}\n`;
838
859
  }
839
- output += `Duration: ${(result.total_duration_ms / 1000).toFixed(1)}s\n\n`;
860
+ output += `Duration: ${(result.total_duration_ms / 1000).toFixed(1)}s\n`;
861
+ if (result.storyboards_executed?.length) {
862
+ output += `Storyboards: ${result.storyboards_executed.join(', ')}\n`;
863
+ }
864
+ output += '\n';
840
865
  // Summary line
841
866
  output += `${result.summary.headline}\n\n`;
842
867
  // Track results
@@ -882,6 +907,22 @@ function formatComplianceResults(result) {
882
907
  }
883
908
  }
884
909
  }
910
+ // Failures with fix guidance (show up to 5 with expected text)
911
+ const failuresWithExpected = (result.failures ?? []).filter(f => f.expected);
912
+ if (failuresWithExpected.length > 0) {
913
+ output += `\nHow to Fix\n`;
914
+ output += `${'─'.repeat(50)}\n`;
915
+ for (const f of failuresWithExpected.slice(0, 5)) {
916
+ output += `❌ ${f.storyboard_id}/${f.step_id} (${f.task})\n`;
917
+ if (f.error)
918
+ output += ` Error: ${f.error}\n`;
919
+ output += ` Expected: ${f.expected.split('\n')[0]}\n`;
920
+ output += ` Debug: ${f.fix_command}\n`;
921
+ }
922
+ if (failuresWithExpected.length > 5) {
923
+ output += ` ... and ${failuresWithExpected.length - 5} more (use --json for all)\n`;
924
+ }
925
+ }
885
926
  // Platform coherence
886
927
  if (result.platform_coherence) {
887
928
  const pc = result.platform_coherence;