npm - agileflow - Versions diffs - 3.4.0 → 3.4.1 - Mend

agileflow 3.4.0 → 3.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

package/CHANGELOG.md +5 -0
package/README.md +4 -4
package/package.json +1 -1
package/scripts/agileflow-welcome.js +79 -0
package/scripts/claude-tmux.sh +12 -36
package/scripts/lib/ac-test-matcher.js +452 -0
package/scripts/lib/audit-registry.js +58 -2
package/scripts/lib/configure-features.js +35 -0
package/scripts/lib/model-profiles.js +25 -5
package/scripts/lib/quality-gates.js +163 -0
package/scripts/lib/signal-detectors.js +43 -0
package/scripts/lib/status-writer.js +255 -0
package/scripts/lib/story-claiming.js +128 -45
package/scripts/lib/task-sync.js +32 -38
package/scripts/lib/tmux-audit-monitor.js +611 -0
package/scripts/lib/tool-registry.yaml +241 -0
package/scripts/lib/tool-shed.js +441 -0
package/scripts/native-team-observer.js +219 -0
package/scripts/obtain-context.js +14 -0
package/scripts/ralph-loop.js +30 -5
package/scripts/smart-detect.js +21 -0
package/scripts/spawn-audit-sessions.js +372 -44
package/scripts/team-manager.js +19 -0
package/src/core/agents/a11y-analyzer-aria.md +155 -0
package/src/core/agents/a11y-analyzer-forms.md +162 -0
package/src/core/agents/a11y-analyzer-keyboard.md +175 -0
package/src/core/agents/a11y-analyzer-semantic.md +153 -0
package/src/core/agents/a11y-analyzer-visual.md +158 -0
package/src/core/agents/a11y-consensus.md +248 -0
package/src/core/agents/ads-consensus.md +74 -0
package/src/core/agents/ads-generate.md +145 -0
package/src/core/agents/ads-performance-tracker.md +197 -0
package/src/core/agents/api-quality-analyzer-conventions.md +148 -0
package/src/core/agents/api-quality-analyzer-docs.md +176 -0
package/src/core/agents/api-quality-analyzer-errors.md +183 -0
package/src/core/agents/api-quality-analyzer-pagination.md +171 -0
package/src/core/agents/api-quality-analyzer-versioning.md +143 -0
package/src/core/agents/api-quality-consensus.md +214 -0
package/src/core/agents/arch-analyzer-circular.md +148 -0
package/src/core/agents/arch-analyzer-complexity.md +171 -0
package/src/core/agents/arch-analyzer-coupling.md +146 -0
package/src/core/agents/arch-analyzer-layering.md +151 -0
package/src/core/agents/arch-analyzer-patterns.md +162 -0
package/src/core/agents/arch-consensus.md +227 -0
package/src/core/commands/adr.md +1 -0
package/src/core/commands/ads/generate.md +238 -0
package/src/core/commands/ads/health.md +327 -0
package/src/core/commands/ads/test-plan.md +317 -0
package/src/core/commands/ads/track.md +288 -0
package/src/core/commands/ads.md +28 -16
package/src/core/commands/assign.md +1 -0
package/src/core/commands/audit.md +43 -6
package/src/core/commands/babysit.md +90 -6
package/src/core/commands/baseline.md +1 -0
package/src/core/commands/blockers.md +1 -0
package/src/core/commands/board.md +1 -0
package/src/core/commands/changelog.md +1 -0
package/src/core/commands/choose.md +1 -0
package/src/core/commands/ci.md +1 -0
package/src/core/commands/code/accessibility.md +347 -0
package/src/core/commands/code/api.md +297 -0
package/src/core/commands/code/architecture.md +297 -0
package/src/core/commands/code/completeness.md +43 -6
package/src/core/commands/code/legal.md +43 -6
package/src/core/commands/code/logic.md +43 -6
package/src/core/commands/code/performance.md +43 -6
package/src/core/commands/code/security.md +43 -6
package/src/core/commands/code/test.md +43 -6
package/src/core/commands/configure.md +1 -0
package/src/core/commands/council.md +1 -0
package/src/core/commands/deploy.md +1 -0
package/src/core/commands/diagnose.md +1 -0
package/src/core/commands/docs.md +1 -0
package/src/core/commands/epic/edit.md +213 -0
package/src/core/commands/epic.md +1 -0
package/src/core/commands/export.md +238 -0
package/src/core/commands/help.md +16 -1
package/src/core/commands/ideate/discover.md +7 -3
package/src/core/commands/ideate/features.md +65 -4
package/src/core/commands/ideate/new.md +158 -124
package/src/core/commands/impact.md +1 -0
package/src/core/commands/learn/explain.md +118 -0
package/src/core/commands/learn/glossary.md +135 -0
package/src/core/commands/learn/patterns.md +138 -0
package/src/core/commands/learn/tour.md +126 -0
package/src/core/commands/migrate/codemods.md +151 -0
package/src/core/commands/migrate/plan.md +131 -0
package/src/core/commands/migrate/scan.md +114 -0
package/src/core/commands/migrate/validate.md +119 -0
package/src/core/commands/multi-expert.md +1 -0
package/src/core/commands/pr.md +1 -0
package/src/core/commands/review.md +1 -0
package/src/core/commands/sprint.md +1 -0
package/src/core/commands/status/undo.md +191 -0
package/src/core/commands/status.md +1 -0
package/src/core/commands/story/edit.md +204 -0
package/src/core/commands/story/view.md +29 -7
package/src/core/commands/story-validate.md +1 -0
package/src/core/commands/story.md +1 -0
package/src/core/commands/tdd.md +1 -0
package/src/core/commands/team/start.md +10 -6
package/src/core/commands/tests.md +1 -0
package/src/core/commands/verify.md +27 -1
package/src/core/commands/workflow.md +2 -0
package/src/core/teams/backend.json +41 -0
package/src/core/teams/frontend.json +41 -0
package/src/core/teams/qa.json +41 -0
package/src/core/teams/solo.json +35 -0
package/src/core/templates/agileflow-metadata.json +5 -0
package/tools/cli/commands/setup.js +85 -3
package/tools/cli/commands/update.js +42 -0
package/tools/cli/installers/ide/claude-code.js +68 -0

package/src/core/commands/ads/track.md ADDED Viewed

@@ -0,0 +1,288 @@
+---
+description: Ads performance tracker — ingest performance CSVs, establish baselines, detect winners and anomalies, output KPI dashboard with trend analysis
+argument-hint: "<performance-data> [PERIOD=7d] [BASELINE=auto]"
+compact_context:
+  priority: medium
+  preserve_rules:
+    - "ACTIVE COMMAND: /agileflow:ads:track - Performance tracking and winner detection"
+    - "Ingest CSV/pasted performance data, establish baselines, detect anomalies"
+    - "Winner detection: statistical significance, cost efficiency, trend direction"
+    - "Delegate to ads-performance-tracker agent for analysis"
+    - "State persisted in docs/08-project/ads-tracking/"
+  state_fields:
+    - period
+    - baseline
+    - campaigns_tracked
+    - winners
+    - anomalies
+---
+# /agileflow:ads:track
+Ingest ad performance data (CSVs or pasted), establish baselines, detect winners and anomalies, and output a KPI dashboard with trend analysis and actionable recommendations.
+---
+## Quick Reference
+```
+/agileflow:ads:track <performance-data>                                # Analyze performance data
+/agileflow:ads:track <csv-file> PERIOD=30d                             # 30-day trend analysis
+/agileflow:ads:track <data> BASELINE=last-report                       # Compare against last saved baseline
+/agileflow:ads:track compare <old-csv> <new-csv>                       # Period-over-period comparison
+```
+---
+## Arguments
+| Argument | Values | Default | Description |
+|----------|--------|---------|-------------|
+| performance-data | CSV, pasted text, or file path | Required | Performance metrics to analyze |
+| PERIOD | 1d, 7d, 14d, 30d, 90d | 7d | Analysis period |
+| BASELINE | auto, last-report, or specific date | auto | Baseline for comparison |
+| FORMAT | dashboard, csv, both | dashboard | Output format |
+---
+## Data Formats Accepted
+### CSV Export (preferred)
+```csv
+Campaign,Ad Set,Ad,Impressions,Clicks,CTR,CPC,Spend,Conversions,CVR,CPA,ROAS,Date
+Brand - Search,Brand Terms,Ad 1,15234,1843,12.1%,$0.45,$829,234,12.7%,$3.54,8.2,2026-02-28
+```
+### Pasted Table
+```
+Campaign         | Spend  | Clicks | CPA    | ROAS
+Brand Search     | $829   | 1,843  | $3.54  | 8.2x
+Non-Brand Search | $2,341 | 987    | $15.20 | 2.1x
+Meta Prospecting | $1,560 | 2,104  | $28.40 | 1.4x
+```
+### Platform-Specific Exports
+- Google Ads: Campaign/Ad Group/Keyword reports
+- Meta Ads Manager: Campaign/Ad Set/Ad performance export
+- LinkedIn: Campaign Manager CSV export
+- TikTok: Business Center export
+---
+## Analysis Framework
+### STEP 1: Parse & Normalize Data
+Delegate to the `ads-performance-tracker` agent:
+```xml
+<invoke name="Agent">
+<parameter name="description">Analyze ad performance data</parameter>
+<parameter name="prompt">TASK: Analyze ad performance data and generate KPI dashboard.
+PERFORMANCE DATA:
+{data}
+PERIOD: {period}
+BASELINE: {baseline}
+Follow the full analysis framework in your instructions.
+OUTPUT: Complete KPI dashboard with winner detection, anomaly alerts, and recommendations.</parameter>
+<parameter name="subagent_type">ads-performance-tracker</parameter>
+</invoke>
+```
+### STEP 2: Establish Baselines
+If this is the first analysis or BASELINE=auto:
+- Calculate median and mean for each metric across all campaigns
+- Set thresholds at 1 standard deviation from mean
+- Save baseline to `docs/08-project/ads-tracking/baseline-{YYYYMMDD}.json`
+If BASELINE=last-report:
+- Load the most recent baseline from `docs/08-project/ads-tracking/`
+- Compare current metrics against saved baseline
+### STEP 3: Winner Detection
+Apply statistical winner detection:
+| Metric | Winner Threshold | Confidence Requirement |
+|--------|-----------------|----------------------|
+| CPC | < 0.7x median CPC | 100+ clicks |
+| CTR | > 1.5x median CTR | 1000+ impressions |
+| CVR | > 1.5x median CVR | 50+ clicks |
+| CPA | < 0.7x median CPA | 20+ conversions |
+| ROAS | > 1.5x median ROAS | $500+ spend |
+**Winner Classification:**
+| Class | Criteria | Action |
+|-------|----------|--------|
+| **Strong Winner** | Beats threshold on 3+ metrics | Scale 20%/week |
+| **Emerging Winner** | Beats threshold on 1-2 metrics | Continue monitoring |
+| **Stable Performer** | Within 1 SD of median on all metrics | Maintain |
+| **Underperformer** | Below median on 2+ metrics | Optimize or pause |
+| **Kill** | CPA > 3x target OR ROAS < 0.5x target | Pause immediately |
+### STEP 4: Anomaly Detection
+Flag anomalies when:
+- **Spend spike**: Daily spend > 2x average (possible budget cap issue)
+- **CTR drop**: CTR drops > 30% day-over-day (ad fatigue, audience saturation)
+- **CPC surge**: CPC increases > 50% week-over-week (competition, quality score)
+- **Conversion drop**: Conversions drop > 40% with stable traffic (tracking break, landing page issue)
+- **ROAS collapse**: ROAS drops below 1:1 (unprofitable, needs immediate action)
+### STEP 5: Trend Analysis
+For each campaign, calculate:
+- **7-day rolling average** for CPC, CTR, CVR, CPA
+- **Trend direction**: Improving, Stable, Declining
+- **Velocity**: Rate of change (slow, moderate, rapid)
+- **Projected trajectory**: If trend continues, estimated metrics in 7/14/30 days
+---
+## Output Format
+```markdown
+# Ads Performance Dashboard
+**Generated**: {YYYY-MM-DD}
+**Period**: {start_date} to {end_date} ({N} days)
+**Platforms**: {platforms}
+**Total Spend**: ${total_spend}
+**Baseline**: {baseline_source}
+---
+## Executive Summary
+| Metric | Current | Baseline | Change | Trend |
+|--------|---------|----------|--------|-------|
+| Total Spend | ${current} | ${baseline} | {+/-}% | {→/↑/↓} |
+| Avg CPC | ${current} | ${baseline} | {+/-}% | {→/↑/↓} |
+| Avg CTR | {current}% | {baseline}% | {+/-}% | {→/↑/↓} |
+| Total Conversions | {current} | {baseline} | {+/-}% | {→/↑/↓} |
+| Avg CPA | ${current} | ${baseline} | {+/-}% | {→/↑/↓} |
+| Blended ROAS | {current}x | {baseline}x | {+/-}% | {→/↑/↓} |
+---
+## Winner Detection
+### Strong Winners (scale these)
+| Campaign | CPC | CTR | CVR | CPA | ROAS | Action |
+|----------|-----|-----|-----|-----|------|--------|
+| {name} | ${cpc} ✅ | {ctr}% ✅ | {cvr}% ✅ | ${cpa} ✅ | {roas}x | Scale 20%/wk |
+### Emerging Winners (monitor closely)
+| Campaign | CPC | CTR | CVR | CPA | ROAS | Action |
+|----------|-----|-----|-----|-----|------|--------|
+| {name} | ${cpc} | {ctr}% ✅ | {cvr}% | ${cpa} | {roas}x | Continue 7d |
+### Kill List (pause immediately)
+| Campaign | Issue | CPA vs Target | Spend Wasted | Action |
+|----------|-------|--------------|-------------|--------|
+| {name} | CPA 3.2x target | ${cpa} vs ${target} | ${wasted} | **PAUSE NOW** |
+---
+## Anomaly Alerts
+| Alert | Campaign | Metric | Expected | Actual | Severity |
+|-------|----------|--------|----------|--------|----------|
+| ⚠️ | {name} | CPC | ${expected} | ${actual} | HIGH |
+| 🔴 | {name} | Conversions | {expected} | {actual} | CRITICAL |
+---
+## Campaign Performance
+### {Campaign Name}
+| Metric | Value | vs Baseline | Trend (7d) |
+|--------|-------|-------------|------------|
+| Spend | ${spend} | {+/-}% | {→/↑/↓} |
+| Impressions | {impr} | {+/-}% | {→/↑/↓} |
+| Clicks | {clicks} | {+/-}% | {→/↑/↓} |
+| CTR | {ctr}% | {+/-}% | {→/↑/↓} |
+| CPC | ${cpc} | {+/-}% | {→/↑/↓} |
+| Conversions | {conv} | {+/-}% | {→/↑/↓} |
+| CVR | {cvr}% | {+/-}% | {→/↑/↓} |
+| CPA | ${cpa} | {+/-}% | {→/↑/↓} |
+| ROAS | {roas}x | {+/-}% | {→/↑/↓} |
+**Classification**: {Strong Winner / Emerging / Stable / Underperformer / Kill}
+---
+## Budget Reallocation Recommendation
+| Campaign | Current Budget | Recommended | Change | Reason |
+|----------|---------------|-------------|--------|--------|
+| {winner} | ${current} | ${recommended} | +20% | Strong winner, scale |
+| {loser} | ${current} | ${recommended} | -100% | 3x kill rule |
+**Estimated Impact**: Reallocating ${amount} from underperformers to winners = estimated {X}% CPA reduction
+---
+## Recommendations
+### Immediate (this session)
+1. **Pause {campaign}** — CPA ${X} exceeds 3x target (${target})
+2. **Scale {campaign}** — Strong winner, increase budget 20%
+### This Week
+3. **Refresh creative for {campaign}** — CTR declining 15% WoW (ad fatigue)
+4. **Check tracking for {campaign}** — Conversions dropped 40% with stable clicks
+### This Month
+5. **Test new angles** — Run /agileflow:ads:generate + /agileflow:ads:test-plan
+6. **Platform diversification** — {platform} ROAS declining, test {other_platform}
+```
+Save dashboard to `docs/08-project/ads-tracking/dashboard-{YYYYMMDD}.md`.
+Save baseline to `docs/08-project/ads-tracking/baseline-{YYYYMMDD}.json`.
+---
+## Present Results
+```xml
+<invoke name="AskUserQuestion">
+<parameter name="questions">[{
+  "question": "Performance dashboard generated. {winners} winners, {kills} to kill, {anomalies} anomalies. Total spend: ${spend}, blended ROAS: {roas}x.",
+  "header": "Next steps",
+  "multiSelect": false,
+  "options": [
+    {"label": "Pause kill-list campaigns now (Recommended)", "description": "Stop wasting ${wasted_amount}/mo on {kill_count} underperforming campaigns"},
+    {"label": "Generate replacement ad copy", "description": "Run /agileflow:ads:generate for fatigued campaigns"},
+    {"label": "Create test plan for winners", "description": "Run /agileflow:ads:test-plan to scale winning angles"},
+    {"label": "Run full ads audit", "description": "Run /agileflow:ads:audit for comprehensive 190-check analysis"}
+  ]
+}]</parameter>
+</invoke>
+```
+---
+<!-- COMPACT_SUMMARY_START -->
+## Compact Summary
+**Command**: `/agileflow:ads:track` - Performance tracking with winner detection
+**Input**: Performance CSV/data from ad platforms
+**Analysis**: Baselines, winner detection (5 classes), anomaly alerts, trend analysis, budget reallocation
+**Key Rules**: 3x Kill Rule for CPA, 20% max scaling per week, 100+ clicks for CPC confidence
+**Output**: KPI dashboard + baseline JSON + recommendations
+**Usage**: `/agileflow:ads:track <data> [PERIOD=7d] [BASELINE=auto]`
+**Files**: `docs/08-project/ads-tracking/dashboard-{YYYYMMDD}.md`, `baseline-{YYYYMMDD}.json`
+<!-- COMPACT_SUMMARY_END -->

package/src/core/commands/ads.md CHANGED Viewed

@@ -22,6 +22,10 @@ Paid advertising audit & planning toolkit for multi-platform account optimizatio
 ```
 /agileflow:ads:audit <account-data>                    # Full multi-platform audit (6 parallel analyzers)
+/agileflow:ads:health <data+url>                        # Unified marketing health scorecard (ads+SEO+landing)
+/agileflow:ads:generate <product-description>           # Bulk ad copy generation (40+ variants)
+/agileflow:ads:test-plan                                # CPC-first test planning with decision criteria
+/agileflow:ads:track <performance-csv>                  # Performance tracking with winner detection
 /agileflow:ads:plan                                     # Campaign planning with industry templates
 /agileflow:ads:google <account-data>                    # Google Ads deep-dive (74 checks)
 /agileflow:ads:meta <account-data>                      # Meta/Facebook audit (46 checks)
@@ -42,6 +46,10 @@ Paid advertising audit & planning toolkit for multi-platform account optimizatio
 | Command | Purpose | When to Use |
 |---------|---------|-------------|
 | **audit** | Full multi-platform audit with Ads Health Score | Starting point for any account |
+| **health** | Unified marketing scorecard (ads+SEO+landing+tracking) | Executive overview of full marketing funnel |
+| **generate** | Bulk ad copy generation (40+ variants) | Creating new ad creative at scale |
+| **test-plan** | CPC-first test planning with decision criteria | Structuring A/B tests with winner/kill rules |
+| **track** | Performance tracking with winner detection | Ongoing campaign monitoring and optimization |
 | **plan** | Campaign planning with industry templates | New campaigns or restructuring |
 | **google** | Google Ads deep-dive (74 checks) | Google-specific optimization |
 | **meta** | Meta/Facebook audit (46 checks) | Meta-specific optimization |
@@ -80,18 +88,22 @@ For best results, include:
 If the user provides data without a sub-command, determine intent:
 1. **"audit my ads"** -> `/agileflow:ads:audit`
-2. **"plan a campaign"** -> `/agileflow:ads:plan`
-3. **"check my Google Ads"** -> `/agileflow:ads:google`
-4. **"Meta" / "Facebook ads"** -> `/agileflow:ads:meta`
-5. **"creative review"** -> `/agileflow:ads:creative`
-6. **"budget" / "spend" / "bidding"** -> `/agileflow:ads:budget`
-7. **"landing page"** -> `/agileflow:ads:landing`
-8. **"competitor" / "competitive"** -> `/agileflow:ads:competitor`
-9. **"LinkedIn"** -> `/agileflow:ads:linkedin`
-10. **"TikTok"** -> `/agileflow:ads:tiktok`
-11. **"Microsoft" / "Bing"** -> `/agileflow:ads:microsoft`
-12. **"YouTube" / "video ads"** -> `/agileflow:ads:youtube`
-13. **Unclear** -> Show the quick reference and ask which analysis they want
+2. **"health check" / "marketing score" / "full scorecard"** -> `/agileflow:ads:health`
+3. **"generate ad copy" / "write ads" / "bulk ads" / "ad variants"** -> `/agileflow:ads:generate`
+4. **"test plan" / "A/B test" / "split test" / "CPC test"** -> `/agileflow:ads:test-plan`
+5. **"track performance" / "dashboard" / "winners" / "KPIs"** -> `/agileflow:ads:track`
+6. **"plan a campaign"** -> `/agileflow:ads:plan`
+7. **"check my Google Ads"** -> `/agileflow:ads:google`
+8. **"Meta" / "Facebook ads"** -> `/agileflow:ads:meta`
+9. **"creative review"** -> `/agileflow:ads:creative`
+10. **"budget" / "spend" / "bidding"** -> `/agileflow:ads:budget`
+11. **"landing page"** -> `/agileflow:ads:landing`
+12. **"competitor" / "competitive"** -> `/agileflow:ads:competitor`
+13. **"LinkedIn"** -> `/agileflow:ads:linkedin`
+14. **"TikTok"** -> `/agileflow:ads:tiktok`
+15. **"Microsoft" / "Bing"** -> `/agileflow:ads:microsoft`
+16. **"YouTube" / "video ads"** -> `/agileflow:ads:youtube`
+17. **Unclear** -> Show the quick reference and ask which analysis they want
 ---
@@ -107,9 +119,9 @@ Show the quick reference table above and ask:
   "multiSelect": false,
   "options": [
     {"label": "Full multi-platform audit (Recommended)", "description": "Comprehensive 6-analyzer audit with Ads Health Score 0-100"},
-    {"label": "Campaign planning", "description": "Industry-specific campaign templates with budget allocation"},
-    {"label": "Platform-specific audit", "description": "Deep-dive into Google, Meta, LinkedIn, TikTok, Microsoft, or YouTube"},
-    {"label": "Budget & bidding strategy", "description": "Optimize spend allocation and bidding across platforms"}
+    {"label": "Marketing health scorecard", "description": "Unified score across ads + SEO + landing pages + tracking"},
+    {"label": "Generate ad copy (40+ variants)", "description": "Bulk ad copy from product description with ICP angles + platform CSV"},
+    {"label": "Create A/B test plan", "description": "CPC-first testing with budget allocation and winner/kill rules"}
   ]
 }]</parameter>
 </invoke>
@@ -122,7 +134,7 @@ Show the quick reference table above and ask:
 **Command**: `/agileflow:ads` - Paid advertising audit & planning router
-**Sub-commands**: audit, plan, google, meta, creative, budget, landing, competitor, linkedin, tiktok, microsoft, youtube
+**Sub-commands**: audit, health, generate, test-plan, track, plan, google, meta, creative, budget, landing, competitor, linkedin, tiktok, microsoft, youtube
 **Quick start**: `/agileflow:ads:audit <account-data>` for full analysis
 <!-- COMPACT_SUMMARY_END -->

package/src/core/commands/assign.md CHANGED Viewed

@@ -1,5 +1,6 @@
 ---
 description: Assign or reassign a story to an owner
+phase: pre-story
 argument-hint: "STORY=<US-ID> NEW_OWNER=<id> [NEW_STATUS=<status>] [NOTE=<text>]"
 compact_context:
   priority: high

package/src/core/commands/audit.md CHANGED Viewed

@@ -1,5 +1,6 @@
 ---
 description: Audit story completion - tests + acceptance criteria verification (GSD pattern)
+phase: post-impl
 argument-hint: "STORY=<US-ID>"
 compact_context:
   priority: high
@@ -162,25 +163,61 @@ Parse results:
 ### Step 3: Verify Acceptance Criteria
-Display each AC from status.json and ask user to verify:
+**3a. Run AC-to-Test Matcher (automated pre-check)**
+Before asking the user to verify AC manually, run the automated AC-to-test matcher:
+```bash
+node -e "
+const { matchACToTests } = require('./.agileflow/scripts/lib/ac-test-matcher');
+const result = matchACToTests('{{STORY_ID}}');
+console.log(JSON.stringify(result, null, 2));
+"
+```
+This returns matched/unmatched AC with confidence levels. Use the results to pre-populate the checklist.
+**3b. Display AC checklist with auto-matched items**
+Show each AC with auto-match status. High-confidence matches are pre-checked:
 ```xml
 <invoke name="AskUserQuestion">
 <parameter name="questions">[{
-  "question": "Verify each acceptance criterion is met for {{STORY_ID}}:",
+  "question": "Verify acceptance criteria for {{STORY_ID}} ({{matched}}/{{total}} auto-matched to tests):",
   "header": "AC Check",
   "multiSelect": true,
   "options": [
-    {"label": "{{AC_1}}", "description": "Mark if complete"},
-    {"label": "{{AC_2}}", "description": "Mark if complete"},
-    {"label": "{{AC_3}}", "description": "Mark if complete"}
+    {"label": "AC1: {{AC_1}} [auto-verified]", "description": "Matched to {{test_file}} (high confidence)"},
+    {"label": "AC2: {{AC_2}} [auto-verified]", "description": "Matched to {{test_file}} (medium confidence)"},
+    {"label": "AC3: {{AC_3}} [needs manual check]", "description": "No matching tests found - verify manually"}
   ]
 }]</parameter>
 </invoke>
 ```
+Auto-matched AC (high/medium confidence) should be pre-selected. Unmatched AC require manual verification.
+**3c. Write ac_status to status.json**
+After verification, write structured `ac_status` to the story in status.json:
+```json
+{
+  "ac_status": {
+    "0": "auto-verified",
+    "1": "auto-verified",
+    "2": "verified",
+    "3": "unverified"
+  },
+  "ac_coverage": 0.75
+}
+```
+Values: `auto-verified` (test match), `verified` (manual confirm), `likely-covered` (medium confidence), `unverified` (not confirmed)
 Calculate verification rate:
-- Count selected (verified) vs total AC
+- Count verified + auto-verified vs total AC
 - 100% = All verified
 - <100% = Partial

package/src/core/commands/babysit.md CHANGED Viewed

@@ -1,6 +1,7 @@
 ---
 description: Interactive mentor for end-to-end feature implementation
-argument-hint: "[EPIC=<EP-ID>] [MODE=loop|once] [VISUAL=true|false] [COVERAGE=<percent>] [MAX=<iterations>] [STRICT=true|false] [TDD=true|false]"
+phase: implementation
+argument-hint: "[EPIC=<EP-ID>] [MODE=loop|once] [VISUAL=true|false] [COVERAGE=<percent>] [MAX=<iterations>] [STRICT=true|false] [TDD=true|false] [VERIFY=suggest|recommend|require|block] [CI_ROUNDS=<N>]"
 compact_context:
   priority: critical
   preserve_rules:
@@ -18,6 +19,8 @@ compact_context:
     - "OBTAIN-CONTEXT: NEVER pipe obtain-context.js through head/tail/truncation - run it bare, it has built-in smart output limits"
     - "STRICT MODE: When STRICT=true, enforce gates - hide commit option until tests pass, auto-trigger code review for 5+ files, remove skip options"
     - "TDD MODE: When TDD=true, start stories in RED phase via /agileflow:tdd. Follow RED→GREEN→REFACTOR phases."
+    - "VERIFY MODE: suggest=current behavior, recommend=show AC summary + (Recommended) framing, require=auto-run verify + AC checklist + gate commit, block=require + browser QA for UI stories. STRICT=true implies VERIFY=require."
+    - "CI FEEDBACK LOOP: When tests fail, auto-retry up to CI_ROUNDS (default 3) before escalating. Uses executeCIFeedbackLoop() from quality-gates.js."
   state_fields:
     - current_story
     - current_epic
@@ -25,6 +28,8 @@ compact_context:
     - claimed_story_id
     - strict_mode
     - tdd_mode
+    - verify_mode
+    - ci_rounds
 ---
 # /agileflow-babysit
@@ -56,13 +61,17 @@ All parameters are optional. Most are auto-detected by the Contextual Feature Ro
 | `COVERAGE` | auto | `80` | Test coverage threshold (%). Set `0` to disable |
 | `STRICT` | `false` | `true` | Enforce workflow gates (tests required before commit, code review for 5+ files) |
 | `TDD` | `false` | `true` | Enable TDD mode (RED→GREEN→REFACTOR phases) for each story |
+| `VERIFY` | `recommend` | `require` | AC verification enforcement level (see VERIFY MODE below) |
+| `CI_ROUNDS` | `3` | `5` | Max auto-retry rounds when tests fail before escalating to human |
-**Auto-detection**: When `EPIC` is specified with 3+ ready stories, `MODE=loop` is auto-enabled. `VISUAL` auto-enables for UI-tagged stories. `COVERAGE` auto-enables when a coverage baseline exists.
+**Auto-detection**: When `EPIC` is specified with 3+ ready stories, `MODE=loop` is auto-enabled. `VISUAL` auto-enables for UI-tagged stories. `COVERAGE` auto-enables when a coverage baseline exists. `STRICT=true` implies `VERIFY=require` unless explicitly overridden.
 ```
 /agileflow:babysit EPIC=EP-0042                    # Auto-detect everything
 /agileflow:babysit EPIC=EP-0042 MODE=once          # Single story only
 /agileflow:babysit STRICT=true TDD=true            # Full discipline: TDD + strict gates
+/agileflow:babysit VERIFY=require                  # Enforce AC verification before commit
+/agileflow:babysit STRICT=true VERIFY=suggest      # Strict gates but relaxed AC verification
 ```
 ---
@@ -114,6 +123,80 @@ When both enabled: stories start in TDD RED phase, phase gates enforced (RED nee
 ---
+## VERIFY MODE (AC Verification Enforcement)
+Graduated verification of acceptance criteria before story completion.
+| Level | Behavior | Use Case |
+|-------|----------|----------|
+| `suggest` | Current behavior - AC verification available but not prompted | Exploratory work |
+| `recommend` (default) | Show AC summary after tests pass, (Recommended) framing for verify | Normal development |
+| `require` | Auto-run ac-test-matcher, show AC checklist, gate commit on AC verification | Team/production |
+| `block` | All of `require` + browser QA for UI stories | Critical/regulated |
+`STRICT=true` implies `VERIFY=require` unless explicitly overridden.
+### How It Works
+1. After tests pass, run `ac-test-matcher.js` to find test-covered AC
+2. Auto-verified AC (high confidence match) are pre-checked
+3. Unmatched AC require manual confirmation via AskUserQuestion
+4. At `require`/`block` level, commit option hidden until all AC confirmed
+### AC Summary in AskUserQuestion
+After tests pass with `VERIFY=recommend` or higher:
+```json
+[
+  {"label": "Verify AC for US-0042 (Recommended)", "description": "3/5 AC auto-matched to tests, 2 need manual check"},
+  {"label": "Commit: 'feat: add session tracking'", "description": "Tests pass, skip AC verification"},
+  {"label": "🔍 Run logic audit", "description": "5 analyzers catch edge cases tests miss"}
+]
+```
+At `require` level, the "Commit" option is hidden until AC verified:
+```json
+[
+  {"label": "Verify AC for US-0042 (Required)", "description": "3/5 AC auto-matched, 2 need manual confirmation"},
+  {"label": "🔍 Run logic audit", "description": "5 analyzers catch edge cases tests miss"}
+]
+```
+Track verification state:
+```
+⬜ tests_passed    → Run /agileflow:verify
+⬜ ac_verified     → Run ac-test-matcher + manual check
+⬜ review_done     → Auto-triggered at 5+ files
+⬜ logic_audit     → Optional (advisory)
+```
+---
+## CI FEEDBACK LOOP (`CI_ROUNDS=<N>`)
+Auto-retry when tests fail, inspired by Stripe's Blueprint Engine pattern. Instead of immediately escalating to the human when tests fail, the agent gets structured CI feedback and retries up to N rounds.
+| Round | What Happens |
+|-------|-------------|
+| 1..N-1 | Tests fail → agent receives failure output → fixes and retries |
+| N | Tests fail → escalate to human with full failure context |
+| Any | Tests pass → proceed to next workflow step |
+**Configuration**: Set `ci_feedback_loops.max_rounds` in `docs/00-meta/agileflow-metadata.json` (default: 3). Override per-session with `CI_ROUNDS=N`.
+**Integration with quality-gates.js**: Uses `executeCIFeedbackLoop()` which wraps `executeGates()` with round tracking and structured agent feedback.
+**When active**: After implementation, instead of asking the user about test failures, automatically re-attempt fixes. After exhausting rounds, present:
+```json
+[
+  {"label": "Review CI failures manually (Recommended)", "description": "3/3 auto-fix rounds exhausted, 2 tests still failing"},
+  {"label": "Run /agileflow:research:ask with failure context", "description": "Get external guidance on persistent failures"},
+  {"label": "Skip failing tests and commit", "description": "Tests may be flaky or unrelated"}
+]
+```
+---
 ## SCALE-ADAPTIVE BEHAVIOR
 | Scale | Planning Depth | Expert Usage | Workflow |
@@ -170,7 +253,8 @@ User parameters override smart detection (`MODE=once` overrides loop, `VISUAL=fa
 | After context | Most impactful ready story |
 | After plan approval | "Start implementing now" |
 | After code written | "Run tests (Recommended)" + logic audit option |
-| After tests pass | "🔍 Run logic audit (Recommended)" or "Commit" |
+| After tests pass | "Verify AC (Recommended)" if VERIFY>=recommend, else "🔍 Run logic audit (Recommended)" or "Commit" |
+| After AC verified | "🔍 Run logic audit (Recommended)" or "Commit" |
 | After logic audit | "Commit: '[type]: [summary]' (Recommended)" |
 | After error | "Try [specific alternative]" |
@@ -263,9 +347,9 @@ Don't wait for smart-detect. Auto-trigger based on these rules:
 10. Verify tests pass
 **Phase 4: Review & Completion**
-11. Offer via AskUserQuestion: tests, logic audit, code review (5+ files), docs sync (API changes), multi-expert (10+ files), ADR (if arch decision)
-12. STRICT gate check: hide commit until gates pass
-13. Update status.json, release story claim: `node .agileflow/scripts/lib/story-claiming.js release <id>`
+11. Offer via AskUserQuestion: tests, AC verification (VERIFY mode), logic audit, code review (5+ files), docs sync (API changes), multi-expert (10+ files), ADR (if arch decision)
+12. STRICT/VERIFY gate check: hide commit until gates pass (tests + AC at require/block level)
+13. Update status.json (including ac_status), release story claim: `node .agileflow/scripts/lib/story-claiming.js release <id>`
 ---

package/src/core/commands/baseline.md CHANGED Viewed

@@ -1,5 +1,6 @@
 ---
 description: Mark current state as verified baseline
+phase: planning
 argument-hint: "[<message>]"
 compact_context:
   priority: critical

package/src/core/commands/blockers.md CHANGED Viewed

@@ -1,5 +1,6 @@
 ---
 description: Track and resolve blockers with actionable suggestions
+phase: pre-story
 argument-hint: "[AGENT=<id>] [SHOW_RESOLVED=true|false] [DETAILED=true|false]"
 model: haiku
 compact_context:

package/src/core/commands/board.md CHANGED Viewed

@@ -1,5 +1,6 @@
 ---
 description: Display visual kanban board with WIP limits
+phase: pre-story
 argument-hint: "[EPIC=<EP-ID>] [OWNER=<id>] [FORMAT=ascii|markdown|html] [GROUP_BY=status|owner|epic]"
 model: haiku
 type: output-only  # Board display - read-only visualization, not an ongoing task

package/src/core/commands/changelog.md CHANGED Viewed

@@ -1,5 +1,6 @@
 ---
 description: Auto-generate changelog from commit history
+phase: post-impl
 argument-hint: "(no arguments)"
 compact_context:
   priority: high

package/src/core/commands/choose.md CHANGED Viewed

@@ -1,5 +1,6 @@
 ---
 description: AI-directed decision making with structured options
+phase: pre-story
 argument-hint: "<decision> [<context>]"
 compact_context:
   priority: normal

package/src/core/commands/ci.md CHANGED Viewed

@@ -1,5 +1,6 @@
 ---
 description: Bootstrap CI/CD workflow with testing and quality checks
+phase: implementation
 argument-hint: "(no arguments)"
 compact_context:
   priority: high