sequant 1.20.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.claude-plugin/marketplace.json +2 -4
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +29 -9
  4. package/dist/bin/cli.js +25 -2
  5. package/dist/src/commands/doctor.js +42 -9
  6. package/dist/src/commands/init.d.ts +1 -0
  7. package/dist/src/commands/init.js +52 -0
  8. package/dist/src/commands/logs.d.ts +1 -0
  9. package/dist/src/commands/logs.js +18 -2
  10. package/dist/src/commands/run.d.ts +7 -0
  11. package/dist/src/commands/run.js +235 -68
  12. package/dist/src/commands/serve.d.ts +13 -0
  13. package/dist/src/commands/serve.js +131 -0
  14. package/dist/src/commands/stats.d.ts +1 -0
  15. package/dist/src/commands/stats.js +185 -26
  16. package/dist/src/commands/status.d.ts +2 -0
  17. package/dist/src/commands/status.js +99 -50
  18. package/dist/src/index.d.ts +2 -2
  19. package/dist/src/index.js +4 -1
  20. package/dist/src/lib/ac-parser.d.ts +2 -0
  21. package/dist/src/lib/ac-parser.js +12 -2
  22. package/dist/src/lib/assess-comment-parser.d.ts +137 -0
  23. package/dist/src/lib/assess-comment-parser.js +344 -0
  24. package/dist/src/lib/ci/config.d.ts +22 -0
  25. package/dist/src/lib/ci/config.js +134 -0
  26. package/dist/src/lib/ci/index.d.ts +12 -0
  27. package/dist/src/lib/ci/index.js +10 -0
  28. package/dist/src/lib/ci/inputs.d.ts +29 -0
  29. package/dist/src/lib/ci/inputs.js +103 -0
  30. package/dist/src/lib/ci/labels.d.ts +34 -0
  31. package/dist/src/lib/ci/labels.js +101 -0
  32. package/dist/src/lib/ci/outputs.d.ts +25 -0
  33. package/dist/src/lib/ci/outputs.js +84 -0
  34. package/dist/src/lib/ci/triggers.d.ts +9 -0
  35. package/dist/src/lib/ci/triggers.js +86 -0
  36. package/dist/src/lib/ci/types.d.ts +131 -0
  37. package/dist/src/lib/ci/types.js +47 -0
  38. package/dist/src/lib/mcp-config.d.ts +54 -0
  39. package/dist/src/lib/mcp-config.js +172 -0
  40. package/dist/src/lib/merge-check/index.js +6 -12
  41. package/dist/src/lib/merge-check/types.d.ts +20 -7
  42. package/dist/src/lib/merge-check/types.js +11 -0
  43. package/dist/src/lib/phase-signal.d.ts +3 -3
  44. package/dist/src/lib/phase-signal.js +5 -3
  45. package/dist/src/lib/settings.d.ts +52 -0
  46. package/dist/src/lib/settings.js +41 -0
  47. package/dist/src/lib/shutdown.d.ts +16 -5
  48. package/dist/src/lib/shutdown.js +32 -12
  49. package/dist/src/lib/solve-comment-parser.d.ts +9 -102
  50. package/dist/src/lib/solve-comment-parser.js +13 -248
  51. package/dist/src/lib/stacks.d.ts +8 -0
  52. package/dist/src/lib/stacks.js +34 -0
  53. package/dist/src/lib/system.js +3 -7
  54. package/dist/src/lib/test-tautology-detector.d.ts +10 -0
  55. package/dist/src/lib/test-tautology-detector.js +43 -4
  56. package/dist/src/lib/upstream/assessment.js +9 -59
  57. package/dist/src/lib/upstream/issues.js +12 -75
  58. package/dist/src/lib/version-check.d.ts +2 -2
  59. package/dist/src/lib/version-check.js +6 -3
  60. package/dist/src/lib/version.d.ts +4 -0
  61. package/dist/src/lib/version.js +25 -0
  62. package/dist/src/lib/workflow/batch-executor.d.ts +18 -86
  63. package/dist/src/lib/workflow/batch-executor.js +232 -55
  64. package/dist/src/lib/workflow/drivers/agent-driver.d.ts +56 -0
  65. package/dist/src/lib/workflow/drivers/agent-driver.js +8 -0
  66. package/dist/src/lib/workflow/drivers/aider.d.ts +18 -0
  67. package/dist/src/lib/workflow/drivers/aider.js +160 -0
  68. package/dist/src/lib/workflow/drivers/claude-code.d.ts +17 -0
  69. package/dist/src/lib/workflow/drivers/claude-code.js +165 -0
  70. package/dist/src/lib/workflow/drivers/index.d.ts +20 -0
  71. package/dist/src/lib/workflow/drivers/index.js +27 -0
  72. package/dist/src/lib/workflow/error-classifier.d.ts +16 -0
  73. package/dist/src/lib/workflow/error-classifier.js +90 -0
  74. package/dist/src/lib/workflow/log-writer.d.ts +6 -3
  75. package/dist/src/lib/workflow/log-writer.js +57 -27
  76. package/dist/src/lib/workflow/metrics-schema.d.ts +9 -9
  77. package/dist/src/lib/workflow/phase-detection.d.ts +23 -0
  78. package/dist/src/lib/workflow/phase-detection.js +45 -29
  79. package/dist/src/lib/workflow/phase-executor.d.ts +42 -3
  80. package/dist/src/lib/workflow/phase-executor.js +345 -220
  81. package/dist/src/lib/workflow/phase-mapper.d.ts +1 -1
  82. package/dist/src/lib/workflow/phase-mapper.js +7 -7
  83. package/dist/src/lib/workflow/platforms/github.d.ts +157 -0
  84. package/dist/src/lib/workflow/platforms/github.js +466 -0
  85. package/dist/src/lib/workflow/platforms/index.d.ts +17 -0
  86. package/dist/src/lib/workflow/platforms/index.js +25 -0
  87. package/dist/src/lib/workflow/platforms/platform-provider.d.ts +67 -0
  88. package/dist/src/lib/workflow/platforms/platform-provider.js +8 -0
  89. package/dist/src/lib/workflow/pr-status.d.ts +2 -4
  90. package/dist/src/lib/workflow/pr-status.js +3 -16
  91. package/dist/src/lib/workflow/qa-cache.d.ts +58 -0
  92. package/dist/src/lib/workflow/qa-cache.js +88 -0
  93. package/dist/src/lib/workflow/reconcile.d.ts +69 -0
  94. package/dist/src/lib/workflow/reconcile.js +290 -0
  95. package/dist/src/lib/workflow/ring-buffer.d.ts +17 -0
  96. package/dist/src/lib/workflow/ring-buffer.js +37 -0
  97. package/dist/src/lib/workflow/run-log-schema.d.ts +115 -24
  98. package/dist/src/lib/workflow/run-log-schema.js +47 -12
  99. package/dist/src/lib/workflow/run-reflect.js +1 -1
  100. package/dist/src/lib/workflow/state-cleanup.js +21 -0
  101. package/dist/src/lib/workflow/state-manager.d.ts +34 -3
  102. package/dist/src/lib/workflow/state-manager.js +278 -126
  103. package/dist/src/lib/workflow/state-schema.d.ts +34 -30
  104. package/dist/src/lib/workflow/state-schema.js +35 -25
  105. package/dist/src/lib/workflow/state-utils.d.ts +3 -1
  106. package/dist/src/lib/workflow/state-utils.js +1 -0
  107. package/dist/src/lib/workflow/types.d.ts +208 -6
  108. package/dist/src/lib/workflow/types.js +20 -1
  109. package/dist/src/lib/workflow/worktree-discovery.d.ts +1 -1
  110. package/dist/src/lib/workflow/worktree-discovery.js +6 -14
  111. package/dist/src/lib/workflow/worktree-manager.js +33 -51
  112. package/dist/src/mcp/index.d.ts +4 -0
  113. package/dist/src/mcp/index.js +4 -0
  114. package/dist/src/mcp/resources.d.ts +7 -0
  115. package/dist/src/mcp/resources.js +111 -0
  116. package/dist/src/mcp/run-registry.d.ts +34 -0
  117. package/dist/src/mcp/run-registry.js +42 -0
  118. package/dist/src/mcp/server.d.ts +12 -0
  119. package/dist/src/mcp/server.js +50 -0
  120. package/dist/src/mcp/tools/logs.d.ts +7 -0
  121. package/dist/src/mcp/tools/logs.js +149 -0
  122. package/dist/src/mcp/tools/run.d.ts +121 -0
  123. package/dist/src/mcp/tools/run.js +591 -0
  124. package/dist/src/mcp/tools/status.d.ts +7 -0
  125. package/dist/src/mcp/tools/status.js +127 -0
  126. package/package.json +10 -1
  127. package/templates/hooks/post-tool.sh +19 -8
  128. package/templates/hooks/pre-tool.sh +36 -49
  129. package/templates/mcp.json +6 -0
  130. package/templates/skills/assess/SKILL.md +354 -352
  131. package/templates/skills/exec/SKILL.md +64 -1
  132. package/templates/skills/fullsolve/SKILL.md +35 -4
  133. package/templates/skills/qa/SKILL.md +486 -9
  134. package/templates/skills/qa/scripts/quality-checks.sh +1 -1
  135. package/templates/skills/setup/SKILL.md +386 -0
  136. package/templates/skills/solve/SKILL.md +38 -664
  137. package/templates/skills/spec/SKILL.md +90 -31
@@ -97,15 +97,22 @@ fi
97
97
 
98
98
  **Phase Marker Emission:**
99
99
 
100
- When posting the QA review comment to GitHub, append a phase marker at the end:
100
+ When posting the QA review comment to GitHub, append a phase marker at the end.
101
+
102
+ **IMPORTANT:** Always include the `commitSHA` field with the current HEAD SHA. This enables incremental re-runs by recording the baseline commit for future QA runs.
103
+
104
+ ```bash
105
+ # Get current HEAD SHA for the phase marker
106
+ COMMIT_SHA=$(git rev-parse HEAD)
107
+ ```
101
108
 
102
109
  ```markdown
103
- <!-- SEQUANT_PHASE: {"phase":"qa","status":"completed","timestamp":"<ISO-8601>"} -->
110
+ <!-- SEQUANT_PHASE: {"phase":"qa","status":"completed","timestamp":"<ISO-8601>","commitSHA":"<HEAD-SHA>"} -->
104
111
  ```
105
112
 
106
113
  If QA determines AC_NOT_MET, emit:
107
114
  ```markdown
108
- <!-- SEQUANT_PHASE: {"phase":"qa","status":"failed","timestamp":"<ISO-8601>","error":"AC_NOT_MET"} -->
115
+ <!-- SEQUANT_PHASE: {"phase":"qa","status":"failed","timestamp":"<ISO-8601>","error":"AC_NOT_MET","commitSHA":"<HEAD-SHA>"} -->
109
116
  ```
110
117
 
111
118
  Include this marker in every `gh issue comment` that represents QA completion.
@@ -383,7 +390,7 @@ If no feature worktree exists (work was done directly on main):
383
390
  # 1. Check for worktree (indicates work may have started)
384
391
  worktree_path=$(git worktree list | grep -i "<issue-number>" | awk '{print $1}' | head -1 || true)
385
392
 
386
- # 2. Check for commits on feature branch (vs main)
393
+ # 2. Check for commits on feature branch (vs main) — include ALL file types
387
394
  commits_exist=$(git log --oneline main..HEAD 2>/dev/null | head -1)
388
395
 
389
396
  # 3. Check for uncommitted changes
@@ -391,8 +398,13 @@ uncommitted_changes=$(git status --porcelain | head -1)
391
398
 
392
399
  # 4. Check for open PR linked to this issue
393
400
  pr_exists=$(gh pr list --search "<issue-number>" --state open --json number -q '.[0].number' 2>/dev/null)
401
+
402
+ # 5. Check for ANY file changes (including .md, prompt-only changes)
403
+ any_diff=$(git diff --name-only main..HEAD 2>/dev/null | head -1 || true)
394
404
  ```
395
405
 
406
+ **IMPORTANT: Prompt-only and markdown-only changes ARE valid implementations.** Many issues (e.g., skill improvements, documentation features) are implemented entirely via `.md` file changes. The detection logic must count these as real implementation, not skip them.
407
+
396
408
  **Implementation Status Matrix:**
397
409
 
398
410
  | Worktree | Commits | Uncommitted | PR | Status | Action |
@@ -407,6 +419,20 @@ pr_exists=$(gh pr list --search "<issue-number>" --state open --json number -q '
407
419
  **Early Exit Condition:**
408
420
  - No commits on feature branch AND no uncommitted changes AND no open PR
409
421
 
422
+ **False Negative Prevention (CRITICAL):**
423
+
424
+ Root cause analysis (#448) found that 33% of multi-attempt QA failures were caused by QA reporting "NOT FOUND" when implementation existed. Common causes:
425
+
426
+ | Cause | Example | Fix |
427
+ |-------|---------|-----|
428
+ | Prompt-only changes | Skill SKILL.md modifications (#413) | Check `git diff --name-only` for ANY file, not just .ts/.tsx |
429
+ | Cross-repo work | Landing page issue tracked in main repo (#393) | Check exec progress comments for cross-repo indicators |
430
+ | Worktree mismatch | QA runs in wrong directory | Verify `pwd` matches expected worktree path |
431
+
432
+ **If `git diff --name-only main..HEAD` shows files but standard detection says "NOT FOUND":**
433
+ 1. The implementation exists — proceed with QA
434
+ 2. Adapt review approach to the file types changed (e.g., review .md changes for content quality rather than TypeScript compilation)
435
+
410
436
  **If early exit triggered:**
411
437
  1. **Skip** sub-agent spawning (nothing to check)
412
438
  2. **Skip** code review (no code to review)
@@ -446,6 +472,19 @@ No code changes found to review. The acceptance criteria cannot be evaluated wit
446
472
 
447
473
  **Important:** Do NOT spawn sub-agents when using early exit. This saves tokens and avoids confusing "no changes found" outputs from quality checkers.
448
474
 
475
+ **CRITICAL — Before early exit, double-check for false negatives:**
476
+ ```bash
477
+ # Final safety check: are there ANY file changes vs main?
478
+ any_changes=$(git diff --name-only main..HEAD 2>/dev/null | wc -l | xargs || echo "0")
479
+ if [[ "$any_changes" -gt 0 ]]; then
480
+ echo "WARNING: $any_changes files changed but detection said NOT FOUND"
481
+ echo "Changed files:"
482
+ git diff --name-only main..HEAD 2>/dev/null | head -20
483
+ echo "Proceeding with QA instead of early exit."
484
+ # DO NOT early exit — proceed with QA
485
+ fi
486
+ ```
487
+
449
488
  ---
450
489
 
451
490
  ### Phase 0b: Quality Plan Verification (CONDITIONAL)
@@ -496,7 +535,7 @@ quality_plan_exists=$(gh issue view <issue> --comments --json comments -q '.comm
496
535
  sort -u || true)
497
536
 
498
537
  # Count derived ACs
499
- derived_count=$(echo "$derived_acs" | grep -c "AC-" || echo "0")
538
+ derived_count=$(echo "$derived_acs" | grep -c "AC-" || true)
500
539
  echo "Found $derived_count derived ACs"
501
540
  ```
502
541
 
@@ -560,6 +599,134 @@ quality_plan_exists=$(gh issue view <issue> --comments --json comments -q '.comm
560
599
 
561
600
  ---
562
601
 
602
+ ### Phase 0c: Incremental Re-Run Detection (CONDITIONAL)
603
+
604
+ **When to apply:** On QA re-runs (when a prior QA phase marker exists in issue comments).
605
+
606
+ **Purpose:** Optimize QA re-runs by detecting what changed since the last QA run and skipping checks whose inputs haven't changed. This significantly reduces token usage and execution time on iterative QA cycles.
607
+
608
+ **Detection:**
609
+
610
+ ```bash
611
+ # Step 1: Check for prior QA run context in cache
612
+ prior_context=$(npx tsx scripts/qa/qa-cache-cli.ts get-run-context 2>/dev/null || true)
613
+
614
+ # Step 2: If no cache context found, fall through to full QA run
615
+ if [[ -z "$prior_context" ]] || echo "$prior_context" | grep -q "No QA run context"; then
616
+ echo "No prior QA context found — running full QA"
617
+ INCREMENTAL_MODE=false
618
+ else
619
+ LAST_QA_SHA=$(echo "$prior_context" | jq -r '.lastQACommitSHA')
620
+ LAST_QA_HASH=$(echo "$prior_context" | jq -r '.lastQADiffHash')
621
+
622
+ # Step 3: Validate the commit SHA still exists in git history
623
+ if ! git cat-file -t "$LAST_QA_SHA" &>/dev/null; then
624
+ echo "Warning: Last QA commit SHA ($LAST_QA_SHA) not found in history — running full QA"
625
+ INCREMENTAL_MODE=false
626
+ else
627
+ # Step 4: Get files changed since last QA
628
+ changed_files=$(npx tsx scripts/qa/qa-cache-cli.ts changed-since "$LAST_QA_SHA" 2>/dev/null || true)
629
+
630
+ if [[ "$changed_files" == "NO_CHANGES" ]]; then
631
+ echo "No changes since last QA — all checks can use cached results"
632
+ INCREMENTAL_MODE=true
633
+ NO_FILE_CHANGES=true
634
+ else
635
+ echo "Changes detected since last QA ($LAST_QA_SHA):"
636
+ echo "$changed_files" | head -20
637
+ INCREMENTAL_MODE=true
638
+ NO_FILE_CHANGES=false
639
+ fi
640
+ fi
641
+ fi
642
+ ```
643
+
644
+ **Skip Logic (when INCREMENTAL_MODE=true):**
645
+
646
+ | Check / Item | Skip Condition | Re-run Condition |
647
+ |-------------|----------------|------------------|
648
+ | Quality checks (type-safety, security, etc.) | Existing diff-hash cache handles this | Hash mismatch -> re-run |
649
+ | Build verification | **Never skip** (always re-run) | Always — cheap and can regress |
650
+ | CI status | **Never skip** (always re-run) | Always — external state changes |
651
+ | AC items with prior status `met` | Skip if NO_FILE_CHANGES=true | Any file changes since last QA |
652
+ | AC items with prior status `not_met` | **Never skip** | Always re-evaluate |
653
+ | AC items with prior status `partially_met` | **Never skip** | Always re-evaluate |
654
+ | AC items with prior status `pending`/`blocked` | **Never skip** | Always re-evaluate |
655
+
656
+ **AC Re-evaluation Rules:**
657
+
658
+ When `INCREMENTAL_MODE=true`:
659
+
660
+ 1. **Load prior AC statuses** from run context:
661
+ ```bash
662
+ # Extract AC statuses from prior context
663
+ ac_statuses=$(echo "$prior_context" | jq -r '.acStatuses | to_entries[] | "\(.key)=\(.value)"')
664
+ ```
665
+
666
+ 2. **For each AC item:**
667
+ - If prior status is `met` AND `NO_FILE_CHANGES=true`:
668
+ - **Skip full re-evaluation** — output "Cached: previously MET, no file changes"
669
+ - Mark as `MET (cached)` in output
670
+ - If prior status is `met` AND files changed:
671
+ - **Re-evaluate** — changes may have caused regression
672
+ - If prior status is `not_met` or `partially_met`:
673
+ - **Always re-evaluate** — this is the primary purpose of re-runs
674
+ - If prior status is `pending` or `blocked`:
675
+ - **Always re-evaluate** — status may have changed
676
+
677
+ 3. **`--no-cache` flag behavior:**
678
+ - When `--no-cache` is passed, set `INCREMENTAL_MODE=false`
679
+ - This forces full re-evaluation of ALL checks and AC items
680
+ - Run context is still saved at the end for future re-runs
681
+
682
+ **Output Format (Incremental QA Summary):**
683
+
684
+ When `INCREMENTAL_MODE=true`, prepend this section to the QA output:
685
+
686
+ ```markdown
687
+ ### Incremental QA Summary
688
+
689
+ **Last QA:** <timestamp> (commit: <sha-short>)
690
+ **Changes since last QA:** N files
691
+
692
+ | Check / AC | Status | Re-run? | Reason |
693
+ |------------|--------|---------|--------|
694
+ | type-safety | PASS | Cached | Diff hash unchanged |
695
+ | security | PASS | Cached | Diff hash unchanged |
696
+ | build | PASS | Re-run | Always fresh |
697
+ | CI status | PASS | Re-run | Always fresh |
698
+ | AC-1 | MET | Cached | Previously MET, no file changes |
699
+ | AC-2 | MET | Re-evaluated | Was NOT_MET |
700
+ | AC-3 | MET | Re-evaluated | Files changed since last QA |
701
+
702
+ **Summary:** X checks cached, Y re-evaluated, Z always-fresh
703
+ ```
704
+
705
+ **Run Context Persistence:**
706
+
707
+ After QA completes (regardless of incremental mode), save the run context:
708
+
709
+ ```bash
710
+ # Get current HEAD SHA
711
+ current_sha=$(git rev-parse HEAD)
712
+ # Get current diff hash
713
+ current_hash=$(npx tsx scripts/qa/qa-cache-cli.ts hash)
714
+
715
+ # Build AC statuses JSON from QA results
716
+ # Example: {"AC-1":"met","AC-2":"not_met","AC-3":"met"}
717
+ ac_json='{"AC-1":"met","AC-2":"not_met"}' # Replace with actual results
718
+
719
+ # Save run context
720
+ echo "{
721
+ \"lastQACommitSHA\": \"$current_sha\",
722
+ \"lastQADiffHash\": \"$current_hash\",
723
+ \"acStatuses\": $ac_json,
724
+ \"timestamp\": \"$(date -u +%Y-%m-%dT%H:%M:%S.000Z)\"
725
+ }" | npx tsx scripts/qa/qa-cache-cli.ts set-run-context
726
+ ```
727
+
728
+ ---
729
+
563
730
  ### Phase 1: CI Status Check — REQUIRED
564
731
 
565
732
  **Purpose:** Check GitHub CI status before finalizing verdict. CI-dependent AC items (e.g., "Tests pass in CI") should reflect actual CI status, not just local test results.
@@ -666,12 +833,110 @@ CI status affects the final verdict through the standard verdict algorithm:
666
833
 
667
834
  ---
668
835
 
836
+ ### Small-Diff Fast Path (Size Gate)
837
+
838
+ **Purpose:** Skip sub-agent spawning for trivial diffs to save ~30s latency and reduce token cost.
839
+
840
+ **Evaluate the size gate BEFORE spawning any quality check sub-agents:**
841
+
842
+ ```bash
843
+ # 1. Read threshold from settings (default: 100)
844
+ threshold=$(cat .sequant/settings.json 2>/dev/null | grep -o '"smallDiffThreshold"[[:space:]]*:[[:space:]]*[0-9]*' | grep -o '[0-9]*$' || echo "100")
845
+ if [ -z "$threshold" ]; then threshold=100; fi
846
+
847
+ # 2. Compute diff size (additions + deletions)
848
+ diff_stats=$(git diff origin/main...HEAD --stat | tail -1 || true)
849
+ additions=$(echo "$diff_stats" | grep -o '[0-9]* insertion' | grep -o '[0-9]*' || echo "0")
850
+ deletions=$(echo "$diff_stats" | grep -o '[0-9]* deletion' | grep -o '[0-9]*' || echo "0")
851
+ total_changes=$((${additions:-0} + ${deletions:-0}))
852
+
853
+ # 3. Check if package.json changed
854
+ pkg_changed=$(git diff origin/main...HEAD --name-only | grep -c '^package\.json$' || true)
855
+
856
+ # 4. Check security-sensitive paths (reuses existing heuristic from anti-pattern detection)
857
+ security_paths=$(git diff origin/main...HEAD --name-only | grep -iE 'auth|payment|security|server-action|middleware|admin' || true)
858
+ security_sensitive="false"
859
+ if [ -n "$security_paths" ]; then security_sensitive="true"; fi
860
+
861
+ echo "Size gate: $total_changes lines changed (threshold: $threshold), pkg_changed=$pkg_changed, security=$security_sensitive"
862
+ ```
863
+
864
+ **Size gate decision:**
865
+
866
+ | Condition | Result |
867
+ |-----------|--------|
868
+ | `total_changes < threshold` AND `pkg_changed == 0` AND `security_sensitive == false` | `SMALL_DIFF=true` — use inline checks |
869
+ | Any condition fails | `SMALL_DIFF=false` — use sub-agents (standard pipeline) |
870
+ | Size gate evaluation errors (e.g., git fails) | `SMALL_DIFF=false` — fall back to full pipeline (AC-5) |
871
+
872
+ **Log the decision (AC-6):**
873
+
874
+ ```markdown
875
+ ### Size Gate
876
+
877
+ | Check | Value |
878
+ |-------|-------|
879
+ | Diff size | N lines (threshold: T) |
880
+ | package.json changed | Yes/No |
881
+ | Security-sensitive paths | Yes/No [list if yes] |
882
+ | Decision | **Inline checks** / **Sub-agents** |
883
+ ```
884
+
885
+ #### If `SMALL_DIFF=true`: Inline Quality Checks
886
+
887
+ Run these checks directly (no sub-agents needed):
888
+
889
+ ```bash
890
+ # Type safety: check for 'any' additions
891
+ any_count=$(git diff origin/main...HEAD | grep '^\+' | grep -v '^\+\+\+' | grep -cw 'any' || true)
892
+
893
+ # Deleted tests check
894
+ deleted_tests=$(git diff origin/main...HEAD --name-only --diff-filter=D | grep -cE '\.(test|spec)\.' || true)
895
+
896
+ # Scope: files changed count
897
+ files_changed=$(git diff origin/main...HEAD --name-only | wc -l | tr -d ' ')
898
+
899
+ # Security scan (lightweight — just check for obvious patterns in added lines)
900
+ security_issues=$(git diff origin/main...HEAD | grep '^\+' | grep -v '^\+\+\+' | grep -ciE 'eval\(|innerHTML|dangerouslySetInnerHTML|exec\(|password.*=.*["']|secret.*=.*["']|api.?key.*=.*["']' || true)
901
+
902
+ echo "Inline checks: any=$any_count, deleted_tests=$deleted_tests, files=$files_changed, security_issues=$security_issues"
903
+ ```
904
+
905
+ **After inline checks, skip to the output template** (the sub-agent section below is not executed).
906
+
907
+ #### If `SMALL_DIFF=false`: Use Sub-Agents (Standard Pipeline)
908
+
909
+ Proceed to the standard Quality Checks section below.
910
+
911
+ ---
912
+
669
913
  ### Quality Checks (Multi-Agent) — REQUIRED
670
914
 
671
- **You MUST spawn sub-agents for quality checks.** Do NOT run these checks inline with bash commands. Sub-agents provide parallel execution, better context isolation, and consistent reporting.
915
+ **When `SMALL_DIFF=false`**, you MUST spawn sub-agents for quality checks. Do NOT run these checks inline with bash commands. Sub-agents provide parallel execution, better context isolation, and consistent reporting.
672
916
 
673
917
  **Execution mode:** Respect the agent execution mode determined above (see "Agent Execution Mode" section).
674
918
 
919
+ #### Documentation Issue Detection
920
+
921
+ Check if this is a documentation-only issue by reading the `SEQUANT_ISSUE_TYPE` environment variable:
922
+
923
+ ```bash
924
+ issue_type="${SEQUANT_ISSUE_TYPE:-}"
925
+ ```
926
+
927
+ **If `SEQUANT_ISSUE_TYPE=docs`**, use the lighter docs QA pipeline:
928
+
929
+ - **Skip** type safety sub-agent (no TypeScript changes expected)
930
+ - **Skip** security scan sub-agent (no runtime code changes)
931
+ - **Keep** scope/size check (still useful for docs)
932
+ - **Focus review on:** content accuracy, completeness, formatting, and link validity
933
+
934
+ **Docs QA sub-agents (1 agent instead of 3):**
935
+
936
+ 1. `Task(subagent_type="general-purpose", model="haiku", mode="bypassPermissions", prompt="Run scope and size checks on the current branch vs main. Check for broken links in changed markdown files. Report: files count, diff size, broken links, size assessment.")`
937
+
938
+ **If `SEQUANT_ISSUE_TYPE` is not set or is not `docs`**, use the standard pipeline below.
939
+
675
940
  #### If parallel mode enabled:
676
941
 
677
942
  **Spawn ALL THREE agents in a SINGLE message (one Tool call per agent, all in same response):**
@@ -889,6 +1154,7 @@ changed_files=$(git diff main...HEAD --name-only | grep -E '\.(ts|tsx|js|jsx)$'
889
1154
  | Error Handling | Empty catch block | ⚠️ Medium |
890
1155
  | Security | Hardcoded secrets | ❌ High |
891
1156
  | Security | SQL concatenation | ❌ High |
1157
+ | Security | Server binds all interfaces (`0.0.0.0`) | ❌ High |
892
1158
  | Memory | Uncleared interval/timeout | ⚠️ Medium |
893
1159
  | A11y | Image without alt | ⚠️ Low |
894
1160
 
@@ -903,7 +1169,61 @@ changed_files=$(git diff main...HEAD --name-only | grep -E '\.(ts|tsx|js|jsx)$'
903
1169
 
904
1170
  See [anti-pattern-detection.md](references/anti-pattern-detection.md) for detection commands and full criteria.
905
1171
 
906
- ### 2f. Call-Site Review (When New Functions Added)
1172
+ ### 2f. Product Review (When New User-Facing Features Added)
1173
+
1174
+ **When to apply:** New CLI commands, MCP tools, configuration options, or other features that end users interact with directly.
1175
+
1176
+ **Detection:**
1177
+ ```bash
1178
+ # Detect user-facing changes
1179
+ cli_added=$(git diff main...HEAD -- bin/cli.ts | grep -E '^\+.*\.command\(' | wc -l | xargs || true)
1180
+ new_commands=$(git diff main...HEAD --name-only | grep -E '^src/commands/' | wc -l | xargs || true)
1181
+ mcp_added=$(git diff main...HEAD --name-only | grep -E '^src/mcp/' | wc -l | xargs || true)
1182
+ config_changed=$(git diff main...HEAD --name-only | grep -E 'settings|config' | wc -l | xargs || true)
1183
+
1184
+ if [[ $((cli_added + new_commands + mcp_added + config_changed)) -gt 0 ]]; then
1185
+ echo "User-facing changes detected - running product review"
1186
+ fi
1187
+ ```
1188
+
1189
+ **If user-facing changes detected, answer these questions:**
1190
+
1191
+ | Question | What to check |
1192
+ |----------|---------------|
1193
+ | **First-time setup:** Can a new user go from zero to working? | List every prerequisite. Try the setup path mentally. |
1194
+ | **Per-environment differences:** Does this work the same everywhere? | macOS/Linux/Windows, different clients/tools, CI vs local |
1195
+ | **What does the user see?** | Walk through the actual UX — wait times, output format, progress indicators |
1196
+ | **What happens after?** | Where's the output? What does the user do next? |
1197
+ | **Failure modes the user will hit:** | Not code edge cases — real scenarios (wrong directory, missing auth, timeout) |
1198
+
1199
+ **Output Format:**
1200
+
1201
+ ```markdown
1202
+ ### Product Review
1203
+
1204
+ **User-facing changes:** [list new commands/tools/options]
1205
+
1206
+ | Question | Finding |
1207
+ |----------|---------|
1208
+ | First-time setup | [All prerequisites identified? Setup path clear?] |
1209
+ | Per-environment | [Any client/platform differences?] |
1210
+ | User sees | [Wait times, output format, progress] |
1211
+ | After completion | [Where output goes, next steps] |
1212
+ | Likely failure modes | [Real user scenarios] |
1213
+
1214
+ **Gaps found:** [list any gaps, or "None"]
1215
+ ```
1216
+
1217
+ **Verdict Impact:**
1218
+
1219
+ | Finding | Verdict Impact |
1220
+ |---------|----------------|
1221
+ | No gaps | No impact |
1222
+ | Missing prerequisites in docs | `AC_MET_BUT_NOT_A_PLUS` |
1223
+ | Feature silently fails in common environment | `AC_NOT_MET` (e.g., wrong cwd, missing auth) |
1224
+ | Poor UX but functional | Note in findings |
1225
+
1226
+ ### 2g. Call-Site Review (When New Functions Added)
907
1227
 
908
1228
  **When to apply:** New exported functions are detected in the diff.
909
1229
 
@@ -1019,7 +1339,7 @@ If the function accepts configuration or mode options:
1019
1339
 
1020
1340
  See [call-site-review.md](references/call-site-review.md) for detailed methodology and examples.
1021
1341
 
1022
- ### 2g. CLI Registration Verification (When Option Interfaces Modified)
1342
+ ### 2h. CLI Registration Verification (When Option Interfaces Modified)
1023
1343
 
1024
1344
  **When to apply:** `RunOptions` or similar CLI option interfaces are modified in the diff.
1025
1345
 
@@ -1139,6 +1459,23 @@ For each AC item, mark as:
1139
1459
 
1140
1460
  Provide a sentence or two explaining why.
1141
1461
 
1462
+ #### AC Literal Verification (REQUIRED)
1463
+
1464
+ **Before marking any AC as MET**, verify the implementation matches the AC text literally, not just in spirit:
1465
+
1466
+ 1. **Extract specific technical claims** from the AC text (commands, flags, function names, config keys, UI elements)
1467
+ 2. **Search the implementation** for each claim using Grep or Read — do not assume presence
1468
+ 3. **If the AC mentions a flag** (e.g., `--file <relevant-files>`), verify that flag appears in the code
1469
+ 4. **If the AC says "works end-to-end"**, trace the full call chain from entry point to execution
1470
+
1471
+ **Example:** If AC says *"shells out to `aider --yes --no-auto-commits --message '<prompt>' --file <relevant-files>`"*:
1472
+ - Verify `--yes` is in args array ✅
1473
+ - Verify `--no-auto-commits` is in args array ✅
1474
+ - Verify `--message` is in args array ✅
1475
+ - Verify `--file` is in args array — **if missing, AC is NOT MET** ❌
1476
+
1477
+ Do NOT mark MET based on "the general intent is satisfied." The AC text is the contract — verify it literally.
1478
+
1142
1479
  ### 3a. AC Status Persistence — REQUIRED
1143
1480
 
1144
1481
  **After evaluating each AC item**, update the status in workflow state using the state CLI:
@@ -1201,6 +1538,7 @@ See [testing-requirements.md](references/testing-requirements.md) for edge case
1201
1538
  2. "Do the tests actually test the feature's primary purpose, or just pass?"
1202
1539
  3. "What's the most likely way this feature could break in production?"
1203
1540
  4. "Am I giving a positive verdict because the code looks clean, or because I verified it works?"
1541
+ 5. "Are there 'design choices' I'm excusing that are actually bad practices?" (e.g., no version pinning, leaking secrets to unnecessary env vars, non-portable shell in example code, no input validation). Would I accept this in a code review from a junior developer?
1204
1542
 
1205
1543
  **Include this section in your output:**
1206
1544
 
@@ -1819,7 +2157,34 @@ npx tsx scripts/state/update.ts fail <issue-number> qa "AC not met"
1819
2157
 
1820
2158
  **Before responding, verify your output includes ALL of these:**
1821
2159
 
1822
- ### Standard QA (Implementation Exists)
2160
+ ### Simple Fix Mode (`SMALL_DIFF=true`)
2161
+
2162
+ When the size gate determined `SMALL_DIFF=true`, use the **simplified output template**. The following sections are **omitted** (not marked N/A — completely absent):
2163
+
2164
+ - Quality Plan Verification
2165
+ - Incremental QA Summary
2166
+ - Call-Site Review
2167
+ - Product Review
2168
+ - Smoke Test
2169
+ - CLI Registration Verification
2170
+ - Skill Command Verification
2171
+ - Script Verification Override
2172
+ - Skill Change Review
2173
+
2174
+ **Required sections for simple fix mode:**
2175
+
2176
+ - [ ] **Size Gate** - Size gate decision table with threshold, diff size, and decision
2177
+ - [ ] **AC Coverage** - Each AC item marked as MET, PARTIALLY_MET, NOT_MET, PENDING, or N/A
2178
+ - [ ] **Quality Metrics** - Type issues, deleted tests, files changed, additions/deletions (from inline checks)
2179
+ - [ ] **Code Review Findings** - Strengths, issues, suggestions
2180
+ - [ ] **Test Coverage Analysis** - Changed files with/without tests, critical paths flagged
2181
+ - [ ] **Anti-Pattern Detection** - Code patterns check (lightweight)
2182
+ - [ ] **Self-Evaluation Completed** - Adversarial self-evaluation section included
2183
+ - [ ] **Verdict** - One of: READY_FOR_MERGE, AC_MET_BUT_NOT_A_PLUS, NEEDS_VERIFICATION, AC_NOT_MET
2184
+ - [ ] **Documentation Check** - README/docs updated if feature adds new functionality
2185
+ - [ ] **Next Steps** - Clear, actionable recommendations
2186
+
2187
+ ### Standard QA (Implementation Exists, `SMALL_DIFF=false`)
1823
2188
 
1824
2189
  - [ ] **Self-Evaluation Completed** - Adversarial self-evaluation section included in output
1825
2190
  - [ ] **AC Coverage** - Each AC item marked as MET, PARTIALLY_MET, NOT_MET, PENDING, or N/A
@@ -1856,6 +2221,103 @@ When early exit is triggered (no commits, no uncommitted changes, no PR):
1856
2221
 
1857
2222
  ## Output Template
1858
2223
 
2224
+ ### Simple Fix Template (`SMALL_DIFF=true`)
2225
+
2226
+ When the size gate triggers simple fix mode, use this shorter template:
2227
+
2228
+ ```markdown
2229
+ ## QA Review for Issue #<N> (Simple Fix)
2230
+
2231
+ ### Size Gate
2232
+
2233
+ | Check | Value |
2234
+ |-------|-------|
2235
+ | Diff size | N lines (threshold: T) |
2236
+ | package.json changed | No |
2237
+ | Security-sensitive paths | No |
2238
+ | Decision | **Inline checks** |
2239
+
2240
+ ### AC Coverage
2241
+
2242
+ | AC | Description | Status | Notes |
2243
+ |----|-------------|--------|-------|
2244
+ | AC-1 | [description] | MET/NOT_MET | [explanation] |
2245
+
2246
+ **Coverage:** X/Y AC items fully met
2247
+
2248
+ ---
2249
+
2250
+ ### Quality Metrics
2251
+
2252
+ | Metric | Value | Status |
2253
+ |--------|-------|--------|
2254
+ | Type issues (`any`) | X | OK/WARN |
2255
+ | Deleted tests | X | OK/WARN |
2256
+ | Files changed | X | OK/WARN |
2257
+ | Lines added | +X | - |
2258
+ | Lines deleted | -X | - |
2259
+ | Security patterns | X | OK/WARN |
2260
+
2261
+ ---
2262
+
2263
+ ### Code Review
2264
+
2265
+ **Strengths:**
2266
+ - [Positive findings]
2267
+
2268
+ **Issues:**
2269
+ - [Problems found]
2270
+
2271
+ **Suggestions:**
2272
+ - [Improvements recommended]
2273
+
2274
+ ---
2275
+
2276
+ ### Test Coverage Analysis
2277
+
2278
+ | Changed File | Tier | Has Tests? | Test File |
2279
+ |--------------|------|------------|-----------|
2280
+ | `[file]` | Critical/Standard/Optional | Yes/No | `[test file or -]` |
2281
+
2282
+ **Coverage:** X/Y changed source files have corresponding tests
2283
+
2284
+ ---
2285
+
2286
+ ### Anti-Pattern Detection
2287
+
2288
+ | File:Line | Category | Pattern | Suggestion |
2289
+ |-----------|----------|---------|------------|
2290
+ | [location] | [category] | [pattern] | [fix] |
2291
+
2292
+ ---
2293
+
2294
+ ### Self-Evaluation
2295
+
2296
+ - **Verified working:** [Yes/No]
2297
+ - **Test efficacy:** [High/Medium/Low]
2298
+ - **Likely failure mode:** [description]
2299
+ - **Verdict confidence:** [High/Medium/Low]
2300
+
2301
+ ---
2302
+
2303
+ ### Verdict: [READY_FOR_MERGE | AC_MET_BUT_NOT_A_PLUS | NEEDS_VERIFICATION | AC_NOT_MET]
2304
+
2305
+ [Explanation of verdict]
2306
+
2307
+ ### Documentation
2308
+
2309
+ - [ ] N/A - Simple fix, no documentation needed
2310
+ - [ ] README/docs updated
2311
+
2312
+ ### Next Steps
2313
+
2314
+ 1. [Action item]
2315
+ ```
2316
+
2317
+ ---
2318
+
2319
+ ### Standard Template (`SMALL_DIFF=false`)
2320
+
1859
2321
  You MUST include these sections:
1860
2322
 
1861
2323
  ```markdown
@@ -1895,6 +2357,21 @@ You MUST include these sections:
1895
2357
 
1896
2358
  ---
1897
2359
 
2360
+ ### Incremental QA Summary
2361
+
2362
+ [Include if INCREMENTAL_MODE=true from Phase 0c, otherwise: "N/A - First QA run"]
2363
+
2364
+ **Last QA:** <timestamp> (commit: <sha-short>)
2365
+ **Changes since last QA:** N files
2366
+
2367
+ | Check / AC | Status | Re-run? | Reason |
2368
+ |------------|--------|---------|--------|
2369
+ | [check/AC] | [status] | Cached / Re-run / Re-evaluated | [reason] |
2370
+
2371
+ **Summary:** X checks cached, Y re-evaluated, Z always-fresh
2372
+
2373
+ ---
2374
+
1898
2375
  ### CI Status
1899
2376
 
1900
2377
  [Include if PR exists, otherwise: "No PR exists yet" or "No CI configured"]
@@ -369,7 +369,7 @@ if [[ -n "$shell_scripts" ]]; then
369
369
  unused_count=0
370
370
  for func in $funcs; do
371
371
  # Count calls (excluding the definition line)
372
- call_count=$(grep -c "\b${func}\b" "$script" 2>/dev/null || echo "0")
372
+ call_count=$(grep -c "\b${func}\b" "$script" 2>/dev/null || true)
373
373
  if [[ $call_count -lt 2 ]]; then # Only definition, no calls
374
374
  echo " ⚠️ Function '$func' defined but possibly not called"
375
375
  unused_count=$((unused_count + 1))