npm - @adcp/client - Versions diffs - 4.22.1 → 4.24.0 - Mend

@adcp/client 4.22.1 → 4.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

package/README.md +23 -9
package/bin/adcp.js +83 -18
package/dist/lib/index.d.ts +3 -5
package/dist/lib/index.d.ts.map +1 -1
package/dist/lib/index.js +16 -12
package/dist/lib/index.js.map +1 -1
package/dist/lib/server/index.d.ts +5 -1
package/dist/lib/server/index.d.ts.map +1 -1
package/dist/lib/server/index.js +10 -1
package/dist/lib/server/index.js.map +1 -1
package/dist/lib/server/postgres-task-store.d.ts +105 -0
package/dist/lib/server/postgres-task-store.d.ts.map +1 -0
package/dist/lib/server/postgres-task-store.js +267 -0
package/dist/lib/server/postgres-task-store.js.map +1 -0
package/dist/lib/server/responses.d.ts +1 -0
package/dist/lib/server/responses.d.ts.map +1 -1
package/dist/lib/server/responses.js +1 -0
package/dist/lib/server/responses.js.map +1 -1
package/dist/lib/server/test-controller.d.ts +88 -0
package/dist/lib/server/test-controller.d.ts.map +1 -0
package/dist/lib/server/test-controller.js +227 -0
package/dist/lib/server/test-controller.js.map +1 -0
package/dist/lib/testing/agent-tester.d.ts +1 -1
package/dist/lib/testing/agent-tester.d.ts.map +1 -1
package/dist/lib/testing/agent-tester.js +13 -1
package/dist/lib/testing/agent-tester.js.map +1 -1
package/dist/lib/testing/compliance/comply.d.ts +24 -5
package/dist/lib/testing/compliance/comply.d.ts.map +1 -1
package/dist/lib/testing/compliance/comply.js +318 -277
package/dist/lib/testing/compliance/comply.js.map +1 -1
package/dist/lib/testing/compliance/index.d.ts +2 -1
package/dist/lib/testing/compliance/index.d.ts.map +1 -1
package/dist/lib/testing/compliance/index.js +6 -1
package/dist/lib/testing/compliance/index.js.map +1 -1
package/dist/lib/testing/compliance/platform-storyboards.d.ts +44 -0
package/dist/lib/testing/compliance/platform-storyboards.d.ts.map +1 -0
package/dist/lib/testing/compliance/platform-storyboards.js +232 -0
package/dist/lib/testing/compliance/platform-storyboards.js.map +1 -0
package/dist/lib/testing/compliance/storyboard-tracks.d.ts +2 -9
package/dist/lib/testing/compliance/storyboard-tracks.d.ts.map +1 -1
package/dist/lib/testing/compliance/storyboard-tracks.js +15 -46
package/dist/lib/testing/compliance/storyboard-tracks.js.map +1 -1
package/dist/lib/testing/compliance/types.d.ts +22 -1
package/dist/lib/testing/compliance/types.d.ts.map +1 -1
package/dist/lib/testing/index.d.ts +1 -1
package/dist/lib/testing/index.d.ts.map +1 -1
package/dist/lib/testing/index.js +6 -1
package/dist/lib/testing/index.js.map +1 -1
package/dist/lib/testing/orchestrator.d.ts.map +1 -1
package/dist/lib/testing/orchestrator.js +5 -1
package/dist/lib/testing/orchestrator.js.map +1 -1
package/dist/lib/testing/scenarios/brand-rights.d.ts +19 -1
package/dist/lib/testing/scenarios/brand-rights.d.ts.map +1 -1
package/dist/lib/testing/scenarios/brand-rights.js +138 -1
package/dist/lib/testing/scenarios/brand-rights.js.map +1 -1
package/dist/lib/testing/scenarios/deterministic.js +7 -7
package/dist/lib/testing/scenarios/deterministic.js.map +1 -1
package/dist/lib/testing/scenarios/index.d.ts +1 -1
package/dist/lib/testing/scenarios/index.d.ts.map +1 -1
package/dist/lib/testing/scenarios/index.js +4 -2
package/dist/lib/testing/scenarios/index.js.map +1 -1
package/dist/lib/testing/scenarios/media-buy.js +4 -4
package/dist/lib/testing/scenarios/media-buy.js.map +1 -1
package/dist/lib/testing/storyboard/loader.d.ts +1 -0
package/dist/lib/testing/storyboard/loader.d.ts.map +1 -1
package/dist/lib/testing/storyboard/loader.js +14 -0
package/dist/lib/testing/storyboard/loader.js.map +1 -1
package/dist/lib/testing/storyboard/request-builder.d.ts.map +1 -1
package/dist/lib/testing/storyboard/request-builder.js +88 -11
package/dist/lib/testing/storyboard/request-builder.js.map +1 -1
package/dist/lib/testing/storyboard/runner.d.ts.map +1 -1
package/dist/lib/testing/storyboard/runner.js +83 -5
package/dist/lib/testing/storyboard/runner.js.map +1 -1
package/dist/lib/testing/storyboard/task-map.d.ts +2 -0
package/dist/lib/testing/storyboard/task-map.d.ts.map +1 -1
package/dist/lib/testing/storyboard/task-map.js +23 -9
package/dist/lib/testing/storyboard/task-map.js.map +1 -1
package/dist/lib/testing/storyboard/types.d.ts +6 -2
package/dist/lib/testing/storyboard/types.d.ts.map +1 -1
package/dist/lib/testing/storyboard/validations.d.ts.map +1 -1
package/dist/lib/testing/storyboard/validations.js +21 -4
package/dist/lib/testing/storyboard/validations.js.map +1 -1
package/dist/lib/testing/types.d.ts +1 -1
package/dist/lib/testing/types.d.ts.map +1 -1
package/dist/lib/types/core.generated.d.ts +242 -3
package/dist/lib/types/core.generated.d.ts.map +1 -1
package/dist/lib/types/core.generated.js +1 -1
package/dist/lib/types/schemas.generated.d.ts +3697 -3468
package/dist/lib/types/schemas.generated.d.ts.map +1 -1
package/dist/lib/types/schemas.generated.js +226 -118
package/dist/lib/types/schemas.generated.js.map +1 -1
package/dist/lib/types/tools.generated.d.ts +281 -79
package/dist/lib/types/tools.generated.d.ts.map +1 -1
package/dist/lib/utils/capabilities.d.ts +2 -2
package/dist/lib/utils/capabilities.d.ts.map +1 -1
package/dist/lib/utils/capabilities.js +9 -3
package/dist/lib/utils/capabilities.js.map +1 -1
package/dist/lib/utils/response-schemas.d.ts.map +1 -1
package/dist/lib/utils/response-schemas.js +9 -0
package/dist/lib/utils/response-schemas.js.map +1 -1
package/dist/lib/version.d.ts +3 -3
package/dist/lib/version.js +3 -3
package/docs/llms.txt +56 -32
package/package.json +8 -2
package/skills/adcp/SKILL.md +118 -33
package/skills/build-creative-agent/SKILL.md +221 -0
package/skills/build-generative-seller-agent/SKILL.md +288 -0
package/skills/build-retail-media-agent/SKILL.md +237 -0
package/skills/build-seller-agent/SKILL.md +313 -0
package/skills/build-signals-agent/SKILL.md +203 -0
package/storyboards/audience_sync.yaml +18 -29
package/storyboards/behavioral_analysis.yaml +40 -72
package/storyboards/brand_rights.yaml +172 -75
package/storyboards/campaign_governance_conditions.yaml +187 -0
package/storyboards/campaign_governance_delivery.yaml +231 -0
package/storyboards/campaign_governance_denied.yaml +136 -0
package/storyboards/capability_discovery.yaml +106 -0
package/storyboards/content_standards.yaml +251 -0
package/storyboards/creative_ad_server.yaml +108 -16
package/storyboards/creative_generative.yaml +317 -0
package/storyboards/creative_lifecycle.yaml +284 -0
package/storyboards/creative_sales_agent.yaml +2 -6
package/storyboards/creative_template.yaml +3 -6
package/storyboards/deterministic_testing.yaml +271 -245
package/storyboards/error_compliance.yaml +105 -108
package/storyboards/media_buy_catalog_creative.yaml +8 -5
package/storyboards/media_buy_generative_seller.yaml +581 -0
package/storyboards/media_buy_governance_escalation.yaml +10 -6
package/storyboards/media_buy_guaranteed_approval.yaml +21 -19
package/storyboards/media_buy_non_guaranteed.yaml +9 -8
package/storyboards/media_buy_proposal_mode.yaml +12 -11
package/storyboards/media_buy_seller.yaml +161 -173
package/storyboards/media_buy_state_machine.yaml +102 -101
package/storyboards/property_governance.yaml +239 -0
package/storyboards/schema.yaml +3 -2
package/storyboards/schema_validation.yaml +58 -51
package/storyboards/si_session.yaml +99 -317
package/storyboards/signal_marketplace.yaml +9 -5
package/storyboards/signal_owned.yaml +6 -5
package/storyboards/social_platform.yaml +274 -0
package/storyboards/test-kits/acme-outdoor.yaml +118 -0
package/storyboards/test-kits/nova-motors.yaml +134 -0
package/storyboards/governance_content_standards.yaml +0 -213
package/storyboards/governance_property_lists.yaml +0 -372

package/dist/lib/testing/compliance/comply.js CHANGED Viewed

@@ -2,8 +2,10 @@
 /**
  * Compliance Engine
  *
- * Runs all applicable capability tracks against an agent
- * and reports results for every track — never stops at the first failure.
+ * Storyboard-driven compliance assessment. Storyboards are the routing
+ * mechanism; tracks are a reporting layer derived from storyboard results.
+ *
+ * Resolution priority: storyboards > platform_type > all applicable.
  */
 var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
     if (k2 === undefined) k2 = k;
@@ -45,98 +47,15 @@ exports.formatComplianceResults = formatComplianceResults;
 exports.formatComplianceResultsJSON = formatComplianceResultsJSON;
 const client_1 = require("../client");
 const storyboard_tracks_1 = require("./storyboard-tracks");
+const runner_1 = require("../storyboard/runner");
+const loader_1 = require("../storyboard/loader");
+const platform_storyboards_1 = require("./platform-storyboards");
 const profiles_1 = require("./profiles");
 const mcp_1 = require("../../protocols/mcp");
 const test_controller_1 = require("../test-controller");
 /**
- * Maps each track to its constituent scenarios and a human-readable label.
- */
-const TRACK_DEFINITIONS = {
-    core: {
-        label: 'Core Protocol',
-        scenarios: [
-            'health_check',
-            'discovery',
-            'capability_discovery',
-            'schema_compliance',
-            'controller_validation',
-            'deterministic_account',
-        ],
-    },
-    products: {
-        label: 'Product Discovery',
-        scenarios: ['pricing_edge_cases', 'behavior_analysis', 'response_consistency'],
-    },
-    media_buy: {
-        label: 'Media Buy Lifecycle',
-        scenarios: [
-            'create_media_buy',
-            'full_sales_flow',
-            'creative_inline',
-            'temporal_validation',
-            'media_buy_lifecycle',
-            'terminal_state_enforcement',
-            'package_lifecycle',
-            'seller_governance_context',
-            'deterministic_media_buy',
-            'deterministic_budget',
-        ],
-    },
-    creative: {
-        label: 'Creative Management',
-        scenarios: ['creative_sync', 'creative_flow', 'deterministic_creative'],
-    },
-    reporting: {
-        label: 'Reporting',
-        scenarios: ['reporting_flow', 'deterministic_delivery'],
-    },
-    governance: {
-        label: 'Governance',
-        scenarios: ['governance_property_lists', 'governance_content_standards', 'property_list_filters'],
-    },
-    campaign_governance: {
-        label: 'Campaign Governance',
-        scenarios: [
-            'campaign_governance',
-            'campaign_governance_denied',
-            'campaign_governance_conditions',
-            'campaign_governance_delivery',
-        ],
-    },
-    signals: {
-        label: 'Signals',
-        scenarios: ['signals_flow'],
-    },
-    si: {
-        label: 'Sponsored Intelligence',
-        scenarios: ['si_session_lifecycle', 'si_availability', 'si_handoff', 'deterministic_session'],
-    },
-    audiences: {
-        label: 'Audience Management',
-        scenarios: ['sync_audiences'],
-    },
-    error_handling: {
-        label: 'Error Compliance',
-        scenarios: ['error_codes', 'error_structure', 'error_transport'],
-    },
-};
-/**
- * Which tools make a track "applicable" — if the agent has at least one
- * of these tools, the track should be attempted.
+ * All compliance tracks in display order.
  */
-const TRACK_RELEVANCE = {
-    core: [], // always applicable
-    products: ['get_products'],
-    media_buy: ['create_media_buy', 'update_media_buy', 'get_media_buys'],
-    creative: ['sync_creatives', 'build_creative', 'list_creative_formats'],
-    reporting: ['get_media_buy_delivery'],
-    governance: ['create_property_list', 'list_content_standards'],
-    campaign_governance: ['sync_plans', 'check_governance'],
-    signals: ['get_signals'],
-    si: ['si_initiate_session'],
-    audiences: ['sync_audiences'],
-    error_handling: ['create_media_buy'],
-};
 const TRACK_ORDER = [
     'core',
     'products',
@@ -149,52 +68,8 @@ const TRACK_ORDER = [
     'si',
     'audiences',
     'error_handling',
+    'brand',
 ];
-function isTrackApplicable(track, tools) {
-    const relevantTools = TRACK_RELEVANCE[track];
-    if (relevantTools.length === 0)
-        return true;
-    return relevantTools.some(t => tools.includes(t));
-}
-function isAuthError(step) {
-    if (!step.error || step.passed)
-        return false;
-    const e = step.error.toLowerCase();
-    return (e.includes('authentication') ||
-        e.includes('x-adcp-auth') ||
-        e.includes('unauthorized') ||
-        e.includes('missing auth') ||
-        e.includes('401'));
-}
-/**
- * Check if a scenario failed entirely due to auth errors.
- * Returns true if every failed step is an auth error.
- */
-function isAuthOnlyFailure(result) {
-    if (result.overall_passed)
-        return false;
-    const failedSteps = (result.steps ?? []).filter(s => !s.passed);
-    return failedSteps.length > 0 && failedSteps.every(isAuthError);
-}
-function computeTrackStatus(results, skippedCount, hasAuth) {
-    if (results.length === 0)
-        return 'skip';
-    // When running without auth, scenarios that failed only due to auth
-    // don't count as failures
-    const effectiveResults = results.map(r => {
-        if (!hasAuth && isAuthOnlyFailure(r)) {
-            return { ...r, _authSkipped: true, overall_passed: true };
-        }
-        return r;
-    });
-    const passed = effectiveResults.filter(r => r.overall_passed).length;
-    const total = effectiveResults.length;
-    if (passed === total)
-        return 'pass';
-    if (passed === 0)
-        return 'fail';
-    return 'partial';
-}
 /**
  * Collect advisory observations from test results.
  * Analyzes the actual data for quality signals that aren't pass/fail.
@@ -264,7 +139,6 @@ function collectObservations(track, results, profile) {
     // Media buy track observations
     if (track === 'media_buy') {
         // Check for valid_actions support (first match only)
-        // Only steps with observation_data are considered — snapshot-only steps don't set it.
         let checkedValidActions = false;
         for (const result of results) {
             if (checkedValidActions)
@@ -281,7 +155,6 @@ function collectObservations(track, results, profile) {
                                 'Without valid_actions, buyer agents must hardcode the state machine to know what operations are permitted.',
                         });
                     }
-                    // Check creative_deadline support
                     if (obs.has_creative_deadline === false) {
                         observations.push({
                             category: 'best_practice',
@@ -291,7 +164,6 @@ function collectObservations(track, results, profile) {
                                 'Buyers need to know when creative uploads must be finalized to avoid rejected submissions.',
                         });
                     }
-                    // Check history entry shape when present
                     if (obs.history_entries && obs.history_entries > 0 && obs.history_valid === false) {
                         observations.push({
                             category: 'best_practice',
@@ -301,7 +173,6 @@ function collectObservations(track, results, profile) {
                                 'History entries must include at least timestamp and action to be useful for audit.',
                         });
                     }
-                    // Check dry_run/sandbox confirmation
                     if (obs.sandbox === undefined || obs.sandbox === null) {
                         observations.push({
                             category: 'best_practice',
@@ -387,6 +258,15 @@ function collectObservations(track, results, profile) {
                                     'Buyers need to distinguish buyer-initiated from seller-initiated cancellations.',
                             });
                         }
+                        if (!obs.canceled_at) {
+                            observations.push({
+                                category: 'completeness',
+                                severity: 'warning',
+                                track,
+                                message: 'Agent transitions to canceled status but does not include canceled_at timestamp. ' +
+                                    'A cancellation timestamp is required for audit and reconciliation.',
+                            });
+                        }
                         checkedCancellation = true;
                     }
                 }
@@ -484,7 +364,16 @@ function collectObservations(track, results, profile) {
 }
 /**
  * Run compliance assessment against an agent.
- * Assesses all applicable tracks independently — never stops at first failure.
+ * Assesses all applicable storyboards and reports results grouped by track.
+ *
+ * Resolution priority:
+ * 1. options.storyboards — run exactly these storyboard IDs
+ * 2. options.platform_type (when tracks is not set) — resolve via PLATFORM_STORYBOARDS
+ * 3. options.tracks — run all storyboards for these tracks
+ * 4. Default — run all applicable storyboards
+ *
+ * When platform_type is set, it always drives coherence checking regardless
+ * of how the storyboard pool was resolved.
  */
 async function comply(agentUrl, options = {}) {
     try {
@@ -494,10 +383,154 @@ async function comply(agentUrl, options = {}) {
         await (0, mcp_1.closeMCPConnections)();
     }
 }
+// ────────────────────────────────────────────────────────────
+// Storyboard resolution
+// ────────────────────────────────────────────────────────────
+/**
+ * Resolve the storyboard pool based on options.
+ * Priority: storyboards > platform_type (when tracks is not set) > tracks > all bundled.
+ */
+function resolveStoryboards(options) {
+    // Explicit storyboard IDs — highest priority
+    if (options.storyboards?.length) {
+        const resolved = [];
+        for (const id of options.storyboards) {
+            const sb = (0, loader_1.getStoryboardById)(id);
+            if (!sb) {
+                throw new Error(`Unknown storyboard ID: "${id}". Use listStoryboards() to see available IDs.`);
+            }
+            resolved.push(sb);
+        }
+        return resolved;
+    }
+    // Platform type — resolve via PLATFORM_STORYBOARDS
+    if (options.platform_type && !options.tracks) {
+        const pt = options.platform_type;
+        const ids = platform_storyboards_1.PLATFORM_STORYBOARDS[pt];
+        if (ids) {
+            const resolved = [];
+            for (const id of ids) {
+                const sb = (0, loader_1.getStoryboardById)(id);
+                if (sb) {
+                    resolved.push(sb);
+                }
+                else {
+                    // Data integrity issue — storyboard declared in PLATFORM_STORYBOARDS
+                    // but not found in bundled set. This is a packaging bug.
+                    console.warn(`PLATFORM_STORYBOARDS[${pt}] references unknown storyboard "${id}"`);
+                }
+            }
+            // Also include universal storyboards (no platform_types) not already in the set
+            const resolvedIds = new Set(resolved.map(s => s.id));
+            for (const sb of (0, loader_1.loadBundledStoryboards)()) {
+                if (!sb.track)
+                    continue;
+                if (resolvedIds.has(sb.id))
+                    continue;
+                if (!sb.platform_types?.length) {
+                    resolved.push(sb);
+                }
+            }
+            return resolved;
+        }
+    }
+    // Track filter — run storyboards whose track field matches
+    if (options.tracks?.length) {
+        const trackSet = new Set(options.tracks);
+        return (0, loader_1.loadBundledStoryboards)().filter(sb => sb.track && trackSet.has(sb.track));
+    }
+    // Default — all compliance storyboards (those with a track field)
+    return (0, loader_1.loadBundledStoryboards)().filter(sb => sb.track);
+}
+/**
+ * Filter storyboards to those applicable for the agent's tools.
+ * A storyboard is applicable if the agent has at least one of its required_tools,
+ * or if it has no required_tools at all.
+ */
+function filterApplicable(storyboards, agentTools) {
+    return storyboards.filter(sb => {
+        if (!sb.required_tools?.length)
+            return true;
+        return sb.required_tools.some(tool => agentTools.includes(tool));
+    });
+}
+/**
+ * Group storyboard results by track.
+ */
+function groupByTrack(results, storyboards) {
+    // Build a storyboard ID → track lookup
+    const trackLookup = new Map();
+    for (const sb of storyboards) {
+        if (sb.track) {
+            trackLookup.set(sb.id, sb.track);
+        }
+    }
+    const grouped = new Map();
+    for (const result of results) {
+        const track = trackLookup.get(result.storyboard_id);
+        if (!track)
+            continue;
+        if (!grouped.has(track))
+            grouped.set(track, []);
+        grouped.get(track).push(result);
+    }
+    return grouped;
+}
+// ────────────────────────────────────────────────────────────
+// Failure extraction
+// ────────────────────────────────────────────────────────────
+/**
+ * Extract a flat list of failures from raw storyboard results.
+ * Preserves step_id and expected text from the storyboard YAML,
+ * and includes a fix_command for targeted re-running.
+ */
+function extractFailures(results, storyboards, agentRef) {
+    const failures = [];
+    // Build storyboard lookup for track and expected text
+    const sbLookup = new Map();
+    for (const sb of storyboards) {
+        sbLookup.set(sb.id, sb);
+    }
+    for (const result of results) {
+        const sb = sbLookup.get(result.storyboard_id);
+        const track = sb?.track ?? 'core';
+        for (const phase of result.phases) {
+            for (const step of phase.steps) {
+                if (step.passed || step.skipped)
+                    continue;
+                // Find the step definition in the storyboard for expected text
+                let expected;
+                if (sb) {
+                    for (const p of sb.phases) {
+                        const stepDef = p.steps.find(s => s.id === step.step_id);
+                        if (stepDef?.expected) {
+                            expected = stepDef.expected.trim();
+                            break;
+                        }
+                    }
+                }
+                failures.push({
+                    track,
+                    storyboard_id: result.storyboard_id,
+                    step_id: step.step_id,
+                    step_title: step.title,
+                    task: step.task,
+                    error: step.error,
+                    expected,
+                    fix_command: `adcp storyboard step ${agentRef} ${result.storyboard_id} ${step.step_id} --json`,
+                });
+            }
+        }
+    }
+    return failures;
+}
+// ────────────────────────────────────────────────────────────
+// Core implementation
+// ────────────────────────────────────────────────────────────
 async function complyImpl(agentUrl, options) {
     const start = Date.now();
-    const { tracks: trackFilter, platform_type, timeout_ms, signal: externalSignal, ...testOptions } = options;
-    // Validate platform_type if provided (issue #402: accept string, validate internally)
+    const { storyboards: _storyboardIds, tracks: _trackFilter, platform_type, timeout_ms, signal: externalSignal, ...testOptions } = options;
+    // Validate platform_type if provided
     let platformProfile;
     if (platform_type) {
         const validTypes = (0, profiles_1.getAllPlatformTypes)();
@@ -537,14 +570,14 @@ async function complyImpl(agentUrl, options) {
         };
         // Check for abort before starting
         signal?.throwIfAborted();
-        // Collect observations across all tracks (declared early for tool discovery diagnostics)
+        // Collect observations across all tracks
         const allObservations = [];
-        // Discover agent capabilities once and share across all scenarios
+        // Discover agent capabilities once and share across all storyboards
         const client = (0, client_1.createTestClient)(agentUrl, effectiveOptions.protocol ?? 'mcp', effectiveOptions);
         const { profile, step: profileStep } = await (0, client_1.discoverAgentProfile)(client);
         effectiveOptions._client = client;
         effectiveOptions._profile = profile;
-        // Log discovered tools for diagnostic purposes
+        // Log discovered tools
         if (profileStep.passed) {
             allObservations.push({
                 category: 'tool_discovery',
@@ -562,145 +595,62 @@ async function complyImpl(agentUrl, options) {
             }
         }
         if (!profileStep.passed) {
-            const errorMsg = profileStep.error || 'Unknown error';
-            const observations = [];
-            // Check for auth errors — either explicit 401/Unauthorized or MCP SDK's generic
-            // "Failed to discover" which often wraps a 401
-            const isExplicitAuthError = errorMsg.includes('401') ||
-                errorMsg.includes('Unauthorized') ||
-                errorMsg.includes('unauthorized') ||
-                errorMsg.includes('authentication') ||
-                errorMsg.includes('JWS') ||
-                errorMsg.includes('JWT') ||
-                errorMsg.includes('signature verification');
-            // When MCP SDK wraps the error, probe the endpoint directly
-            let isAuthError = isExplicitAuthError;
-            if (!isAuthError && errorMsg.includes('Failed to discover')) {
-                try {
-                    const probe = await fetch(agentUrl, {
-                        method: 'POST',
-                        headers: { 'Content-Type': 'application/json' },
-                        signal,
-                    });
-                    if (probe.status === 401 || probe.status === 403) {
-                        isAuthError = true;
-                    }
-                }
-                catch {
-                    // Network error — not an auth issue
-                }
-            }
-            const headline = isAuthError ? `Authentication required` : `Agent unreachable — ${errorMsg}`;
-            if (isAuthError) {
-                // Check if agent supports OAuth
-                const { discoverOAuthMetadata } = await Promise.resolve().then(() => __importStar(require('../../auth/oauth/discovery')));
-                const oauthMeta = await discoverOAuthMetadata(agentUrl);
-                if (oauthMeta) {
-                    observations.push({
-                        category: 'auth',
-                        severity: 'error',
-                        message: `Agent requires OAuth (issuer: ${oauthMeta.issuer || 'unknown'}). Save credentials: adcp --save-auth <alias> ${agentUrl} --oauth`,
-                    });
-                }
-                else {
-                    observations.push({
-                        category: 'auth',
-                        severity: 'error',
-                        message: 'Agent returned 401. Check your --auth token.',
-                    });
-                }
-            }
-            return {
-                agent_url: agentUrl,
-                agent_profile: profile,
-                overall_status: (isAuthError ? 'auth_required' : 'unreachable'),
-                tracks: [],
-                tested_tracks: [],
-                skipped_tracks: [],
-                expected_tracks: [],
-                summary: {
-                    tracks_passed: 0,
-                    tracks_failed: 0,
-                    tracks_skipped: 0,
-                    tracks_partial: 0,
-                    tracks_expected: 0,
-                    headline,
-                },
-                observations,
-                tested_at: new Date().toISOString(),
-                total_duration_ms: Date.now() - start,
-                dry_run: effectiveOptions.dry_run !== false,
-            };
+            return buildUnreachableResult(agentUrl, profile, profileStep.error, start, effectiveOptions, signal);
         }
-        const tracksToRun = trackFilter ?? TRACK_ORDER;
-        const trackResults = [];
-        for (const track of tracksToRun) {
-            // Check for abort between tracks
+        // Resolve and filter storyboard pool
+        const allStoryboards = resolveStoryboards(options);
+        const applicableStoryboards = filterApplicable(allStoryboards, profile.tools);
+        // Run storyboards
+        const storyboardResults = [];
+        const runOptions = {
+            ...effectiveOptions,
+            agentTools: profile.tools,
+        };
+        for (const sb of applicableStoryboards) {
             signal?.throwIfAborted();
-            const def = TRACK_DEFINITIONS[track];
-            if (!def)
+            const result = await (0, runner_1.runStoryboard)(agentUrl, sb, runOptions);
+            storyboardResults.push(result);
+        }
+        // Group results by track and build TrackResults
+        const grouped = groupByTrack(storyboardResults, applicableStoryboards);
+        const trackResults = [];
+        // Determine which tracks had storyboards in the pool (even if filtered out by tools)
+        const poolTrackSet = new Set();
+        for (const sb of allStoryboards) {
+            if (sb.track)
+                poolTrackSet.add(sb.track);
+        }
+        for (const track of TRACK_ORDER) {
+            if (!poolTrackSet.has(track))
                 continue;
-            if (!isTrackApplicable(track, profile.tools)) {
+            const results = grouped.get(track) ?? [];
+            if (results.length > 0) {
+                const trackResult = (0, storyboard_tracks_1.mapStoryboardResultsToTrackResult)(track, results, profile);
+                const observations = collectObservations(track, trackResult.scenarios, profile);
+                trackResult.observations = observations;
+                allObservations.push(...observations);
+                trackResults.push(trackResult);
+            }
+            else {
+                // Track was in the pool but no storyboards ran (agent lacks tools)
                 const isExpected = track !== 'core' && (platformProfile?.expected_tracks.includes(track) ?? false);
-                const requiredTools = TRACK_RELEVANCE[track];
-                const trackObservations = [];
-                if (requiredTools.length > 0) {
-                    trackObservations.push({
-                        category: 'tool_discovery',
-                        severity: isExpected ? 'warning' : 'info',
-                        message: `Track "${track}" skipped: agent does not advertise any of [${requiredTools.join(', ')}]. ` +
-                            `Agent tools: [${profile.tools.join(', ')}]`,
-                        evidence: { expected_tools: requiredTools, agent_tool_count: profile.tools.length },
-                    });
-                }
-                allObservations.push(...trackObservations);
                 trackResults.push({
                     track,
                     status: isExpected ? 'expected' : 'skip',
-                    label: def.label,
-                    scenarios: [],
-                    skipped_scenarios: def.scenarios,
-                    observations: trackObservations,
-                    duration_ms: 0,
-                });
-                continue;
-            }
-            const trackStart = Date.now();
-            // Run compliance storyboards for this track
-            const storyboardResults = await (0, storyboard_tracks_1.runTrackStoryboards)(agentUrl, track, profile.tools, {
-                ...effectiveOptions,
-                agentTools: profile.tools,
-            });
-            let trackResult;
-            if (storyboardResults.length > 0) {
-                // Map storyboard results to TrackResult for backwards compat
-                trackResult = (0, storyboard_tracks_1.mapStoryboardResultsToTrackResult)(track, storyboardResults, profile);
-            }
-            else {
-                // No storyboards for this track — skip
-                trackResult = {
-                    track,
-                    status: 'skip',
-                    label: def.label,
+                    label: storyboard_tracks_1.TRACK_LABELS[track] || track,
                     scenarios: [],
                     skipped_scenarios: [],
                     observations: [],
                     duration_ms: 0,
-                };
+                });
             }
-            // Collect observations from track results and agent profile
-            const observations = collectObservations(track, trackResult.scenarios, profile);
-            trackResult.observations = observations;
-            trackResult.duration_ms = Date.now() - trackStart;
-            allObservations.push(...observations);
-            trackResults.push(trackResult);
         }
         // Build platform coherence result if platform type was declared
         let platformCoherence;
         if (platformProfile) {
             const findings = platformProfile.checkCoherence(profile);
-            const missingTracks = platformProfile.expected_tracks.filter(t => !isTrackApplicable(t, profile.tools) && t !== 'core');
-            // Add coherence findings as observations
+            const applicableTrackSet = new Set(trackResults.filter(t => t.status !== 'skip' && t.status !== 'expected').map(t => t.track));
+            const missingTracks = platformProfile.expected_tracks.filter(t => !applicableTrackSet.has(t) && t !== 'core');
             for (const finding of findings) {
                 allObservations.push({
                     category: 'coherence',
@@ -720,18 +670,14 @@ async function complyImpl(agentUrl, options) {
             };
         }
         const summary = buildSummary(trackResults);
-        // Partition tracks by disposition (issue #403)
         const testedTracks = trackResults.filter(t => t.status === 'pass' || t.status === 'fail' || t.status === 'partial');
         const skippedTracks = trackResults
             .filter(t => t.status === 'skip')
-            .map(t => {
-            const required = TRACK_RELEVANCE[t.track];
-            return {
-                track: t.track,
-                label: t.label,
-                reason: required.length > 0 ? `Agent lacks required tools: ${required.join(', ')}` : 'Agent lacks required tools',
-            };
-        });
+            .map(t => ({
+            track: t.track,
+            label: t.label,
+            reason: 'Agent lacks required tools for applicable storyboards',
+        }));
         const expectedTracks = trackResults
             .filter(t => t.status === 'expected')
             .map(t => ({
@@ -739,8 +685,10 @@ async function complyImpl(agentUrl, options) {
             label: t.label,
             reason: `Expected for ${platformCoherence?.label ?? 'declared platform type'}`,
         }));
-        // Compute overall status (issue #401)
         const overallStatus = computeOverallStatus(summary);
+        // Build flat failures array from raw storyboard results (preserves step_id and expected)
+        const agentRef = options.agent_alias || agentUrl;
+        const failures = extractFailures(storyboardResults, applicableStoryboards, agentRef);
         return {
             agent_url: agentUrl,
             agent_profile: profile,
@@ -751,7 +699,9 @@ async function complyImpl(agentUrl, options) {
             expected_tracks: expectedTracks,
             summary,
             observations: allObservations,
+            failures: failures.length > 0 ? failures : undefined,
             platform_coherence: platformCoherence,
+            storyboards_executed: applicableStoryboards.map(sb => sb.id),
             controller_detected: controllerDetection.detected,
             controller_scenarios: controllerDetection.detected ? controllerDetection.scenarios : undefined,
             tested_at: new Date().toISOString(),
@@ -767,6 +717,77 @@ async function complyImpl(agentUrl, options) {
         }
     }
 }
+/**
+ * Build result for an unreachable or auth-required agent.
+ */
+async function buildUnreachableResult(agentUrl, profile, errorMsg, start, effectiveOptions, signal) {
+    const err = errorMsg || 'Unknown error';
+    const observations = [];
+    const isExplicitAuthError = err.includes('401') ||
+        err.includes('Unauthorized') ||
+        err.includes('unauthorized') ||
+        err.includes('authentication') ||
+        err.includes('JWS') ||
+        err.includes('JWT') ||
+        err.includes('signature verification');
+    let isAuthError = isExplicitAuthError;
+    if (!isAuthError && err.includes('Failed to discover')) {
+        try {
+            const probe = await fetch(agentUrl, {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                signal,
+            });
+            if (probe.status === 401 || probe.status === 403) {
+                isAuthError = true;
+            }
+        }
+        catch {
+            // Network error — not an auth issue
+        }
+    }
+    const headline = isAuthError ? `Authentication required` : `Agent unreachable — ${err}`;
+    if (isAuthError) {
+        const { discoverOAuthMetadata } = await Promise.resolve().then(() => __importStar(require('../../auth/oauth/discovery')));
+        const oauthMeta = await discoverOAuthMetadata(agentUrl);
+        if (oauthMeta) {
+            observations.push({
+                category: 'auth',
+                severity: 'error',
+                message: `Agent requires OAuth (issuer: ${oauthMeta.issuer || 'unknown'}). Save credentials: adcp --save-auth <alias> ${agentUrl} --oauth`,
+            });
+        }
+        else {
+            observations.push({
+                category: 'auth',
+                severity: 'error',
+                message: 'Agent returned 401. Check your --auth token.',
+            });
+        }
+    }
+    return {
+        agent_url: agentUrl,
+        agent_profile: profile,
+        overall_status: (isAuthError ? 'auth_required' : 'unreachable'),
+        tracks: [],
+        tested_tracks: [],
+        skipped_tracks: [],
+        expected_tracks: [],
+        summary: {
+            tracks_passed: 0,
+            tracks_failed: 0,
+            tracks_skipped: 0,
+            tracks_partial: 0,
+            tracks_expected: 0,
+            headline,
+        },
+        observations,
+        storyboards_executed: [],
+        tested_at: new Date().toISOString(),
+        total_duration_ms: Date.now() - start,
+        dry_run: effectiveOptions.dry_run !== false,
+    };
+}
 /**
  * Compute overall status for a reachable agent.
  * auth_required and unreachable are set directly in the early-exit path.
@@ -836,7 +857,11 @@ function formatComplianceResults(result) {
     if (result.platform_coherence) {
         output += `Platform: ${result.platform_coherence.label}\n`;
     }
-    output += `Duration: ${(result.total_duration_ms / 1000).toFixed(1)}s\n\n`;
+    output += `Duration: ${(result.total_duration_ms / 1000).toFixed(1)}s\n`;
+    if (result.storyboards_executed?.length) {
+        output += `Storyboards: ${result.storyboards_executed.join(', ')}\n`;
+    }
+    output += '\n';
     // Summary line
     output += `${result.summary.headline}\n\n`;
     // Track results
@@ -882,6 +907,22 @@ function formatComplianceResults(result) {
             }
         }
     }
+    // Failures with fix guidance (show up to 5 with expected text)
+    const failuresWithExpected = (result.failures ?? []).filter(f => f.expected);
+    if (failuresWithExpected.length > 0) {
+        output += `\nHow to Fix\n`;
+        output += `${'─'.repeat(50)}\n`;
+        for (const f of failuresWithExpected.slice(0, 5)) {
+            output += `❌ ${f.storyboard_id}/${f.step_id} (${f.task})\n`;
+            if (f.error)
+                output += `   Error: ${f.error}\n`;
+            output += `   Expected: ${f.expected.split('\n')[0]}\n`;
+            output += `   Debug: ${f.fix_command}\n`;
+        }
+        if (failuresWithExpected.length > 5) {
+            output += `   ... and ${failuresWithExpected.length - 5} more (use --json for all)\n`;
+        }
+    }
     // Platform coherence
     if (result.platform_coherence) {
         const pc = result.platform_coherence;