npm - sequant - Versions diffs - 2.1.1 → 2.2.0 - Mend

sequant 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/.claude-plugin/marketplace.json +1 -1
package/.claude-plugin/plugin.json +1 -1
package/dist/bin/cli.js +1 -0
package/dist/src/commands/init.d.ts +1 -0
package/dist/src/commands/init.js +122 -3
package/dist/src/commands/run-compat.d.ts +14 -0
package/dist/src/commands/run-compat.js +12 -0
package/dist/src/commands/run-display.d.ts +17 -0
package/dist/src/commands/run-display.js +116 -0
package/dist/src/commands/run.d.ts +4 -26
package/dist/src/commands/run.js +47 -772
package/dist/src/commands/status.js +24 -1
package/dist/src/index.d.ts +11 -0
package/dist/src/index.js +9 -0
package/dist/src/lib/errors.d.ts +93 -0
package/dist/src/lib/errors.js +97 -0
package/dist/src/lib/settings.d.ts +236 -0
package/dist/src/lib/settings.js +482 -37
package/dist/src/lib/skill-version.d.ts +19 -0
package/dist/src/lib/skill-version.js +68 -0
package/dist/src/lib/templates.d.ts +1 -0
package/dist/src/lib/templates.js +1 -1
package/dist/src/lib/workflow/batch-executor.js +13 -5
package/dist/src/lib/workflow/config-resolver.d.ts +50 -0
package/dist/src/lib/workflow/config-resolver.js +167 -0
package/dist/src/lib/workflow/error-classifier.d.ts +17 -7
package/dist/src/lib/workflow/error-classifier.js +113 -15
package/dist/src/lib/workflow/phase-executor.d.ts +31 -0
package/dist/src/lib/workflow/phase-executor.js +143 -48
package/dist/src/lib/workflow/run-log-schema.d.ts +12 -0
package/dist/src/lib/workflow/run-log-schema.js +7 -1
package/dist/src/lib/workflow/run-orchestrator.d.ts +161 -0
package/dist/src/lib/workflow/run-orchestrator.js +510 -0
package/dist/src/lib/workflow/worktree-manager.d.ts +4 -3
package/dist/src/lib/workflow/worktree-manager.js +61 -11
package/package.json +1 -1
package/templates/skills/assess/SKILL.md +239 -77
package/templates/skills/exec/SKILL.md +7 -68
package/templates/skills/fullsolve/SKILL.md +303 -137
package/templates/skills/qa/SKILL.md +42 -46
package/templates/skills/qa/scripts/quality-checks.sh +47 -1
package/templates/skills/spec/SKILL.md +183 -982
package/templates/skills/spec/references/quality-checklist.md +75 -0
package/templates/skills/test/SKILL.md +0 -27
package/templates/skills/testgen/SKILL.md +0 -27

package/templates/skills/qa/SKILL.md CHANGED Viewed

@@ -122,10 +122,23 @@ Include this marker in every `gh issue comment` that represents QA completion.
 Invocation:
 - `/qa 123`: Treat `123` as the GitHub issue/PR identifier in context.
+- `/qa 123 172`: Treat both as issue numbers — process each sequentially.
 - `/qa <freeform description>`: Treat the text as context about the change to review.
 - `/qa 123 --parallel`: Force parallel agent execution (faster, higher token usage).
 - `/qa 123 --sequential`: Force sequential agent execution (slower, lower token usage).
+### Multi-Issue Invocation
+When multiple issue numbers are provided (e.g., `/qa 167 172`):
+1. **Parse all issue numbers** from args
+2. **Process each issue sequentially** with inline code review — do NOT spawn ad-hoc background agents for the diff reading or AC verification portions
+3. The built-in `sequant-qa-checker` sub-agents (type safety, scope, security) continue to run per the size gate rules for each issue
+4. Each issue gets its own full QA cycle: context fetch → diff review → quality checks → verdict → comment
+5. Post a **separate QA comment** to each issue's GitHub thread
+**Why sequential with inline review:** Ad-hoc background agents for code review are unreliable — they hallucinate about file existence, misattribute API patterns, and hit permission issues on worktree reads. The narrowly-scoped `sequant-qa-checker` agents work well because they have specific, bounded tasks. The code review portion must stay inline for accuracy.
 ### Agent Execution Mode
 Before spawning quality check agents, determine the execution mode:
@@ -758,21 +771,21 @@ echo "Size gate: $total_changes lines changed (threshold: $threshold), pkg_chang
 Run these checks directly (no sub-agents needed):
-```bash
-# Type safety: check for 'any' additions
-any_count=$(git diff origin/main...HEAD | grep '^\+' | grep -v '^\+\+\+' | grep -cw 'any' || true)
+**IMPORTANT:** Use the Grep tool (not bash `grep`) for pattern matching — bash grep uses BSD regex on macOS which is incompatible with some patterns below. The Grep tool uses ripgrep which works cross-platform.
+```bash
 # Deleted tests check
 deleted_tests=$(git diff origin/main...HEAD --name-only --diff-filter=D | grep -cE '\.(test|spec)\.' || true)
 # Scope: files changed count
 files_changed=$(git diff origin/main...HEAD --name-only | wc -l | tr -d ' ')
+```
-# Security scan (lightweight — just check for obvious patterns in added lines)
-security_issues=$(git diff origin/main...HEAD | grep '^\+' | grep -v '^\+\+\+' | grep -ciE 'eval\(|innerHTML|dangerouslySetInnerHTML|exec\(|password.*=.*["']|secret.*=.*["']|api.?key.*=.*["']' || true)
+For type safety and security scans, use the Grep tool instead of bash:
+- **Type safety:** `Grep(pattern=":\\s*any[,;)\\]]|as any", path="<changed-files>")` on added lines
+- **Security scan:** `Grep(pattern="eval\\(|innerHTML|dangerouslySetInnerHTML|password.*=.*[\"']|secret.*=.*[\"']", path="<changed-files>")` on added lines
-echo "Inline checks: any=$any_count, deleted_tests=$deleted_tests, files=$files_changed, security_issues=$security_issues"
-```
+Count results from the Grep tool output to get `any_count` and `security_issues`.
 **After inline checks, skip to the output template** (the sub-agent section below is not executed).
@@ -838,6 +851,12 @@ issue_type="${SEQUANT_ISSUE_TYPE:-}"
 admin_modified=$(git diff main...HEAD --name-only | grep -E "^app/admin/" | head -1 || true)
 ```
+**Add skill sync check if skill files modified:**
+```bash
+skill_modified=$(git diff main...HEAD --name-only | grep -E "^\.(claude/skills|skills|templates/skills)/" | head -1 || true)
+```
+If skill files are modified, the quality-checks.sh script automatically runs the three-directory sync check (section 12). If divergence is detected, this blocks `READY_FOR_MERGE` — verdict becomes `AC_MET_BUT_NOT_A_PLUS` with a note to run `npx tsx scripts/check-skill-sync.ts --fix`.
 See [quality-gates.md](references/quality-gates.md) for detailed verdict synthesis.
 ### Using MCP Tools (Optional)
@@ -1359,39 +1378,20 @@ Before any READY_FOR_MERGE verdict, complete the adversarial thinking checklist:
 See [testing-requirements.md](references/testing-requirements.md) for edge case checklists.
-### 5. Adversarial Self-Evaluation (REQUIRED)
+### 5. Risk Assessment (REQUIRED unless SMALL_DIFF)
-**Before issuing your verdict**, you MUST complete this adversarial self-evaluation to catch issues that automated quality checks miss.
-**Why this matters:** QA automation catches type issues, deleted tests, and scope creep - but misses:
-- Features that don't actually work as expected
-- Tests that pass but don't test the right things
-- Edge cases only apparent when actually using the feature
-**Answer these questions honestly:**
-1. "Did the implementation actually work when I reviewed it, or am I assuming it works?"
-2. "Do the tests actually test the feature's primary purpose, or just pass?"
-3. "What's the most likely way this feature could break in production?"
-4. "Am I giving a positive verdict because the code looks clean, or because I verified it works?"
-5. "Are there 'design choices' I'm excusing that are actually bad practices?" (e.g., no version pinning, leaking secrets to unnecessary env vars, non-portable shell in example code, no input validation). Would I accept this in a code review from a junior developer?
+**Before issuing your verdict**, state the implementation risks in 2-3 sentences.
 **Include this section in your output:**
 ```markdown
-### Self-Evaluation
+### Risk Assessment
-- **Verified working:** [Yes/No - did you actually verify the feature works, or assume it does?]
-- **Test efficacy:** [High/Medium/Low - do tests catch the feature breaking?]
-- **Likely failure mode:** [What would most likely break this in production?]
-- **Verdict confidence:** [High/Medium/Low - explain any uncertainty]
+- **Likely failure mode:** [How would this break in production? Be specific.]
+- **Not tested:** [What gaps exist in test coverage for these changes?]
 ```
-**If any answer reveals concerns:**
-- Factor the concerns into your verdict
-- If significant, change verdict to `AC_NOT_MET` or `AC_MET_BUT_NOT_A_PLUS`
-- Document the concerns in the QA comment
-**Do NOT skip this self-evaluation.** Honest reflection catches issues that code review cannot.
+**If either field reveals significant concerns**, factor them into your verdict. A serious failure mode with no test coverage should downgrade to `AC_MET_BUT_NOT_A_PLUS` or `AC_NOT_MET`.
 #### Skill Change Review (Conditional)
@@ -1402,7 +1402,7 @@ See [testing-requirements.md](references/testing-requirements.md) for edge case
 skills_changed=$(git diff main...HEAD --name-only | grep -E "^\.claude/skills/.*\.md$" | wc -l | xargs || true)
 ```
-**If skills_changed > 0, add these adversarial prompts:**
+**If skills_changed > 0, add these verification prompts:**
 | Prompt | Why It Matters |
 |--------|----------------|
@@ -1985,14 +1985,14 @@ When the size gate determined `SMALL_DIFF=true`, use the **simplified output tem
 - [ ] **Code Review Findings** - Strengths, issues, suggestions
 - [ ] **Test Coverage Analysis** - Changed files with/without tests, critical paths flagged
 - [ ] **Anti-Pattern Detection** - Code patterns check (lightweight)
-- [ ] **Self-Evaluation Completed** - Adversarial self-evaluation section included
+- [ ] **Risk Assessment** - Likely failure mode and coverage gaps stated
 - [ ] **Verdict** - One of: READY_FOR_MERGE, AC_MET_BUT_NOT_A_PLUS, NEEDS_VERIFICATION, AC_NOT_MET
 - [ ] **Documentation Check** - README/docs updated if feature adds new functionality
 - [ ] **Next Steps** - Clear, actionable recommendations
 ### Standard QA (Implementation Exists, `SMALL_DIFF=false`)
-- [ ] **Self-Evaluation Completed** - Adversarial self-evaluation section included in output
+- [ ] **Risk Assessment** - Likely failure mode and coverage gaps stated in output
 - [ ] **AC Coverage** - Each AC item marked as MET, PARTIALLY_MET, NOT_MET, PENDING, or N/A
 - [ ] **Quality Plan Verification** - Included if quality plan exists (or marked N/A if no quality plan)
 - [ ] **CI Status** - Included if PR exists (or marked "No PR" / "No CI configured")
@@ -2008,7 +2008,7 @@ When the size gate determined `SMALL_DIFF=true`, use the **simplified output tem
 - [ ] **Execution Evidence** - Included if scripts/CLI modified (or marked N/A)
 - [ ] **Script Verification Override** - Included if scripts/CLI modified AND /verify was skipped (with justification and risk assessment)
 - [ ] **Skill Command Verification** - Included if `.claude/skills/**/*.md` modified (or marked N/A)
-- [ ] **Skill Change Review** - Skill-specific adversarial prompts included if skills changed
+- [ ] **Skill Change Review** - Skill-specific verification prompts included if skills changed
 - [ ] **Smoke Test** - Included if workflow-affecting changes (skills, scripts, CLI), or marked "Not Required"
 - [ ] **CHANGELOG Verification** - User-facing changes have `[Unreleased]` entry (or marked N/A)
 - [ ] **Documentation Check** - README/docs updated if feature adds new functionality
@@ -2097,12 +2097,10 @@ When the size gate triggers simple fix mode, use this shorter template:
 ---
-### Self-Evaluation
+### Risk Assessment
-- **Verified working:** [Yes/No]
-- **Test efficacy:** [High/Medium/Low]
-- **Likely failure mode:** [description]
-- **Verdict confidence:** [High/Medium/Low]
+- **Likely failure mode:** [How would this break in production?]
+- **Not tested:** [What gaps exist in test coverage?]
 ---
@@ -2387,12 +2385,10 @@ You MUST include these sections:
 ---
-### Self-Evaluation
+### Risk Assessment
-- **Verified working:** [Yes/No - did you actually verify the feature works?]
-- **Test efficacy:** [High/Medium/Low - do tests catch the feature breaking?]
-- **Likely failure mode:** [What would most likely break this in production?]
-- **Verdict confidence:** [High/Medium/Low - explain any uncertainty]
+- **Likely failure mode:** [How would this break in production? Be specific.]
+- **Not tested:** [What gaps exist in test coverage for these changes?]
 ---

package/templates/skills/qa/scripts/quality-checks.sh CHANGED Viewed

@@ -385,7 +385,53 @@ else
 fi
 # =============================================================================
-# 11. Build Verification (cacheable - expensive operation)
+# =============================================================================
+# 11.5. Skill Sync Check (when skill files modified)
+# =============================================================================
+echo ""
+skill_files_changed=$(git diff main...HEAD --name-only | grep -E '^\.(claude/skills|skills|templates/skills)/' || true)
+if [[ -n "$skill_files_changed" ]]; then
+  echo "🔍 Checking three-directory skill sync..."
+  if [[ -f "scripts/check-skill-sync.ts" ]]; then
+    sync_output=$(npx tsx scripts/check-skill-sync.ts 2>&1 || true)
+    sync_exit=$?
+    sync_summary=$(echo "$sync_output" | grep "^Summary:" || true)
+    if [[ $sync_exit -ne 0 ]]; then
+      echo "⚠️  Skill sync: DIVERGENCE DETECTED"
+      echo "$sync_summary"
+      echo "   Run: npx tsx scripts/check-skill-sync.ts --fix"
+    else
+      echo "✅ Skill sync: All files synced across 3 directories"
+    fi
+  else
+    echo "   (scripts/check-skill-sync.ts not found — using inline diff)"
+    diverged=0
+    for f in $skill_files_changed; do
+      if [[ "$f" == .claude/skills/* ]]; then
+        rel="${f#.claude/skills/}"
+        for mirror in "templates/skills" "skills"; do
+          if [[ -f "${mirror}/${rel}" ]]; then
+            if ! diff -q ".claude/skills/${rel}" "${mirror}/${rel}" > /dev/null 2>&1; then
+              echo "   ⚠️  DIVERGED: ${rel} (.claude/skills vs ${mirror})"
+              diverged=$((diverged + 1))
+            fi
+          fi
+        done
+      fi
+    done
+    if [[ $diverged -eq 0 ]]; then
+      echo "✅ Skill sync: Changed skill files are synced"
+    else
+      echo "⚠️  Skill sync: ${diverged} file(s) diverged"
+      echo "   Fix: copy from .claude/skills/ to templates/skills/ and skills/"
+    fi
+  fi
+else
+  echo "🔍 Skill sync: No skill files changed (skipped)"
+fi
+# =============================================================================
+# 12. Build Verification (cacheable - expensive operation)
 # =============================================================================
 verify_build_against_main() {