npm - bigpowers - Versions diffs - 2.25.0 → 2.27.0 - Mend

bigpowers 2.25.0 → 2.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/.pi/package.json +2 -2
package/.pi/prompts/build-epic.md +22 -0
package/.pi/prompts/compose-workflow.md +9 -7
package/.pi/prompts/develop-tdd.md +20 -0
package/.pi/prompts/elaborate-spec.md +20 -1
package/.pi/prompts/evolve-skill.md +14 -7
package/.pi/prompts/kickoff-branch.md +26 -1
package/.pi/prompts/quick-fix.md +2 -0
package/.pi/prompts/run-benchmark.md +69 -0
package/.pi/prompts/run-planning.md +19 -0
package/.pi/prompts/scope-work.md +6 -0
package/.pi/prompts/slice-tasks.md +6 -0
package/.pi/prompts/stocktake-skills.md +3 -1
package/.pi/prompts/verify-work.md +30 -0
package/.pi/skills/build-epic/SKILL.md +22 -0
package/.pi/skills/compose-workflow/SKILL.md +9 -7
package/.pi/skills/develop-tdd/SKILL.md +20 -0
package/.pi/skills/elaborate-spec/SKILL.md +20 -1
package/.pi/skills/evolve-skill/SKILL.md +14 -7
package/.pi/skills/kickoff-branch/SKILL.md +26 -1
package/.pi/skills/quick-fix/SKILL.md +2 -0
package/.pi/skills/run-benchmark/SKILL.md +71 -0
package/.pi/skills/run-planning/SKILL.md +19 -0
package/.pi/skills/scope-work/SKILL.md +6 -0
package/.pi/skills/slice-tasks/SKILL.md +6 -0
package/.pi/skills/stocktake-skills/SKILL.md +3 -1
package/.pi/skills/verify-work/SKILL.md +30 -0
package/CHANGELOG.md +25 -0
package/SKILL-INDEX.md +2 -2
package/build-epic/SKILL.md +22 -0
package/compose-workflow/SKILL.md +9 -7
package/develop-tdd/SKILL.md +20 -0
package/elaborate-spec/SKILL.md +20 -1
package/evolve-skill/SKILL.md +14 -7
package/kickoff-branch/SKILL.md +26 -1
package/package.json +1 -1
package/quick-fix/SKILL.md +2 -0
package/run-benchmark/SKILL.md +70 -0
package/run-planning/SKILL.md +19 -0
package/scope-work/SKILL.md +6 -0
package/scripts/run-skill-verify.sh +57 -0
package/skills-lock.json +17 -12
package/slice-tasks/SKILL.md +6 -0
package/stocktake-skills/SKILL.md +3 -1
package/verify-work/SKILL.md +30 -0

package/skills-lock.json CHANGED Viewed

@@ -18,7 +18,7 @@
     },
     "build-epic": {
       "description": "Eight-step epic build cycle — reads state.yaml, execution-status.yaml, and one epic capsule; updates status via bp-yaml-set or direct edit. Resume mode runs one step per invocation. Use instead of ad-hoc execute-plan for release work.",
-      "sha256": "b7049084cd180053",
+      "sha256": "7a376ef092fde9cc",
       "path": "build-epic/SKILL.md"
     },
     "change-request": {
@@ -33,7 +33,7 @@
     },
     "compose-workflow": {
       "description": "Chain multiple bigpowers skills into a custom workflow recipe saved in specs/. Use when a project repeats a non-standard skill sequence, or user wants a documented playbook beyond orchestrate-project modes.",
-      "sha256": "4d498c0c6975cbce",
+      "sha256": "854e272ca7bd6e3a",
       "path": "compose-workflow/SKILL.md"
     },
     "craft-skill": {
@@ -73,7 +73,7 @@
     },
     "develop-tdd": {
       "description": "Test-driven development with red-green-refactor loop using vertical slices. Use for features (epic tasks) or bugs (specs/bugs/BUG-*.md).",
-      "sha256": "6ace47f7db4642d4",
+      "sha256": "e050f400da14eff3",
       "path": "develop-tdd/SKILL.md"
     },
     "diagnose-root": {
@@ -93,7 +93,7 @@
     },
     "elaborate-spec": {
       "description": "Refine a rough idea into a clear, detailed specification through dialogue. Does not produce code. Use when user has a vague idea, wants to think through a feature before planning, or needs to turn \"I want X\" into a concrete spec.",
-      "sha256": "6d60cffd5139347e",
+      "sha256": "9edc17057449444d",
       "path": "elaborate-spec/SKILL.md"
     },
     "enforce-first": {
@@ -103,7 +103,7 @@
     },
     "evolve-skill": {
       "description": "Benchmark-gated skill evolution — consume bigpowers-benchmark report, propose plan-work change, edit skill via craft-skill, re-run benchmark, record ADR. Use when a skill underperforms on benchmark or stocktake finds systemic gap.",
-      "sha256": "3582333ff9b15942",
+      "sha256": "e2d127c4ae0b5af7",
       "path": "evolve-skill/SKILL.md"
     },
     "execute-plan": {
@@ -148,7 +148,7 @@
     },
     "kickoff-branch": {
       "description": "Create a git worktree and feature branch, then verify a clean test baseline before any code is written. Use when starting a new feature or task, when user wants to work in isolation from main, or mentions \"start a branch\" or \"new worktree\".",
-      "sha256": "77864a4086f91f60",
+      "sha256": "d3c74c05a8d790c0",
       "path": "kickoff-branch/SKILL.md"
     },
     "map-codebase": {
@@ -198,7 +198,7 @@
     },
     "quick-fix": {
       "description": "\"Streamlined fast-path for trivial data-only fixes — no TDD, no branching ceremony. Collapses 6 skills into 2 for changes that are purely data with no logic risk. Aborts with fallback to investigate-bug if guardrails trigger.\"",
-      "sha256": "6b83f22481ff4995",
+      "sha256": "2b9f1cd6557b6256",
       "path": "quick-fix/SKILL.md"
     },
     "release-branch": {
@@ -226,6 +226,11 @@
       "sha256": "c16bbe4854a0d665",
       "path": "respond-review/SKILL.md"
     },
+    "run-benchmark": {
+      "description": "Run skill quality benchmarks from specs/benchmarks/ definitions and write pass@k reports. Use before and after evolve-skill to prove quality changes are improvements, not regressions.",
+      "sha256": "e27f4e682e505b19",
+      "path": "run-benchmark/SKILL.md"
+    },
     "run-evals": {
       "description": "Eval-Driven Development — define capability and regression evals before building; code graders use verify commands, model graders use explicit rubrics; log pass@k. Use before develop-tdd on new features, or when measuring agent capability over runs.",
       "sha256": "b3cd89a7e440c94f",
@@ -233,12 +238,12 @@
     },
     "run-planning": {
       "description": "\"DISCOVER-PHASE ADVANCER — Drive the discover-phase checklist (specs/planning-status.yaml) through survey-context → scope-work → research-first → elaborate-spec → plan-release → slice-tasks. NOT a duplicate of plan-work or the planning spine; it orchestrates the pre-coding discover phase only.\"",
-      "sha256": "eb6c9d3c0e26b7fc",
+      "sha256": "a2e7c028e7f817de",
       "path": "run-planning/SKILL.md"
     },
     "scope-work": {
       "description": "\"PLANNING SPINE STEP 1 of 3 — Scope the work: define what is in and out of scope and save as specs/product/SCOPE_LATEST.yaml. Use before slice-tasks or plan-release on any new initiative. Not a substitute for slice-tasks (step 2) or plan-work (step 3).\"",
-      "sha256": "3d333e2bfa5f9998",
+      "sha256": "d3cb167d8a5296be",
       "path": "scope-work/SKILL.md"
     },
     "search-skills": {
@@ -268,7 +273,7 @@
     },
     "slice-tasks": {
       "description": "\"PLANNING SPINE STEP 2 of 3 — Slice the work: break a scoped PRD into vertical-slice stories in specs/epics/. Use after scope-work (step 1), before plan-work (step 3). Not a substitute for scope-work or plan-work.\"",
-      "sha256": "bda9db54dbe791b5",
+      "sha256": "7948164e218541ea",
       "path": "slice-tasks/SKILL.md"
     },
     "smoke-test": {
@@ -283,7 +288,7 @@
     },
     "stocktake-skills": {
       "description": "Sequential subagent batch audit of the bigpowers skill catalog — Quick Scan (changed only) or Full (all skills). Use during sustain phase, before a major release, or when catalog drift is suspected.",
-      "sha256": "c58bf4f70ff02cd3",
+      "sha256": "6e73b2d2cf0cfbe1",
       "path": "stocktake-skills/SKILL.md"
     },
     "survey-context": {
@@ -318,7 +323,7 @@
     },
     "verify-work": {
       "description": "Multi-phase UAT gate — cold-start smoke, build, typecheck, lint, tests, step-by-step manual verification, gaps-closure loop. Use after execute-plan or develop-tdd, before audit-code.",
-      "sha256": "d1aeab669a9c5621",
+      "sha256": "44f96eac8380c15b",
       "path": "verify-work/SKILL.md"
     },
     "visual-dashboard": {

package/slice-tasks/SKILL.md CHANGED Viewed

@@ -18,6 +18,12 @@ Produce **epic capsule story tasks** in `specs/epics/eNN-slug/` — vertical sli
 ## Process
+0. **Read planning-context.yaml** — If `specs/planning-context.yaml` exists, read it first:
+   ```bash
+   test -f specs/planning-context.yaml && echo "Context found" || echo "No context — starting fresh"
+   ```
+   Use `feature_name`, `constraints`, and `out_of_scope` to inform slice boundaries. `key_decisions` in the file may constrain how stories are cut (e.g., "no external deps" constrains slice 2). If absent, proceed normally.
 1. **Read context** — Read `specs/product/SCOPE_LATEST.yaml` and/or `specs/release-plan.yaml`. Understand what the epic delivers end-to-end.
 2. **Cut tracer-bullet slices** — Identify the thinnest possible vertical path through the stack that delivers user value. Start with this slice; it will catch integration issues first. For example:

package/stocktake-skills/SKILL.md CHANGED Viewed

@@ -15,7 +15,8 @@ Audit SKILL.md catalog for drift, stale triggers, missing HARD GATEs, and INDEX
 | Mode | Scope |
 |------|-------|
 | **Quick Scan** | Skills changed since last tag or in current diff |
-| **Full** | All 62 skills per SKILL-INDEX.md + catalog audit |
+| **Full** | All skills per SKILL-INDEX.md + catalog audit |
+| **--verify** | Run `bash scripts/run-skill-verify.sh` and append health results to the stocktake report |
 ## Process
@@ -27,6 +28,7 @@ Audit SKILL.md catalog for drift, stale triggers, missing HARD GATEs, and INDEX
    - Skills with zero calls (potential dead weight)
    - Skills with high average time (candidates for `evolve-skill`)
 5. Critical findings → `plan-work` story; cosmetic → `evolve-skill` candidate.
+6. **--verify mode:** Run `bash scripts/run-skill-verify.sh` and append a `## Verify Health` section to the stocktake report: `"N/68 PASS, M FAIL, K SKIP"`. FAIL skills are critical findings and go straight to `plan-work`.
 ### Skill timing data (`metrics.skill_timings`)

package/verify-work/SKILL.md CHANGED Viewed

@@ -16,6 +16,7 @@ Review answers "is the code good?"; Verify answers "does the built thing do what
 - Default: full UAT plus gaps loop
 - --smoke: Cold-start only plus one happy-path flow. Use for hotfixes.
+- --cli: CLI tool verification — replaces cold-start with binary smoke checklist. Use for CLI tools with no server process.
 ## Process
@@ -24,6 +25,12 @@ Review answers "is the code good?"; Verify answers "does the built thing do what
 0. **Branch check** — must not be `main`/`master`.
 1. Read active story tasks from `specs/epics/<capsule>/eNNsYY-tasks.yaml` and story spec from `specs/epics/<capsule>/eNNsYY-<slug>.md` (countable-story-format, Gherkin in §17).
+1a. **Pre-UAT verify validation** — for each task's `verify:` command, run it and detect pattern mismatches before UAT begins. If a grep/awk/jq command fails, check whether the pattern is wrong vs. a genuine failure:
+    ```bash
+    # For a failing grep -q 'PATTERN' FILE, check what is actually in FILE
+    grep 'PATTERN' FILE || grep -n '' FILE | head -20   # show nearest lines
+    ```
+    Report: `"Pattern 'X' not found. Nearest match: 'Y' at line N"` and ask `"Update verify command? [Y/n]"`. Fix before proceeding — a mismatched verify command produces false failures during UAT.
 2. **Cold-start smoke** (if app): stop server, clear caches, boot from scratch.
 3. **AGENTS.md preflight** — before running default checks, call `bash scripts/bp-read-agents.sh` to detect project-specific commands. If `BP_PREFLIGHT` is set, run it instead of the default mechanical gates (or in addition to them if the project requires both). Output: `"Using preflight from AGENTS.md: <cmd>"`. Fall back to `CLAUDE.md` commands if AGENTS.md is absent.
 4. Mechanical gates: build → typecheck → lint → tests (from `CLAUDE.md` or AGENTS.md).
@@ -91,6 +98,29 @@ phases:
 > **HARD GATE** — Verification evidence MUST be persisted before marking the story done. No evidence = not verified.
+## --cli mode
+For CLI tools where cold-start smoke (stop server / clear caches) does not apply. Auto-detected when the project has no server process (no `listen()`, no `server.js`, no blocking `main()`); or explicitly activated with `--cli`.
+**Auto-detect binary name:**
+```bash
+# Cargo.toml
+BINARY=$(grep '^name' Cargo.toml | head -1 | awk -F'"' '{print $2}')
+# package.json
+BINARY=$(node -e "console.log(require('./package.json').bin && Object.keys(require('./package.json').bin)[0] || '')" 2>/dev/null)
+# Makefile
+BINARY=$(grep '^BIN\s*=' Makefile 2>/dev/null | awk '{print $3}')
+```
+**CLI verification checklist (replaces cold-start smoke):**
+1. `--help` smoke: `$BINARY --help` → assert output contains "Usage"
+2. `--version` check: `$BINARY --version` → assert version matches manifest (Cargo.toml / package.json)
+3. Happy-path: run documented example command from README.md → assert non-empty output
+4. Edge case: `$BINARY --invalid-flag` → assert exit code ≠ 0 and error message printed
+No "stop server" or "clear caches" steps are executed in `--cli` mode. Steps 3–6 of the default process (mechanical gates, UAT, gaps loop) still run unchanged.
 ## Verify
 → verify: `test -f specs/verifications/<story_id>-verify.yaml && echo "Evidence persisted"`