npm - @wazir-dev/cli - Versions diffs - 1.3.0 → 1.4.0 - Mend

@wazir-dev/cli 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (133) hide show

package/CHANGELOG.md +17 -2
package/docs/research/2026-03-20-agents/a18fb002157904af5.txt +187 -0
package/docs/research/2026-03-20-agents/a1d0ac79ac2f11e6f.txt +2 -0
package/docs/research/2026-03-20-agents/a324079de037abd7c.txt +198 -0
package/docs/research/2026-03-20-agents/a357586bccfafb0e5.txt +256 -0
package/docs/research/2026-03-20-agents/a4365394e4d753105.txt +137 -0
package/docs/research/2026-03-20-agents/a492af28bc52d3613.txt +136 -0
package/docs/research/2026-03-20-agents/a4984db0b6a8eee07.txt +124 -0
package/docs/research/2026-03-20-agents/a5b30e59d34bbb062.txt +214 -0
package/docs/research/2026-03-20-agents/a5cf7829dab911586.txt +165 -0
package/docs/research/2026-03-20-agents/a607157c30dd97c9e.txt +96 -0
package/docs/research/2026-03-20-agents/a60b68b1e19d1e16b.txt +115 -0
package/docs/research/2026-03-20-agents/a722af01c5594aba0.txt +166 -0
package/docs/research/2026-03-20-agents/a787bdc516faa5829.txt +181 -0
package/docs/research/2026-03-20-agents/a7c46d1bba1056ed2.txt +132 -0
package/docs/research/2026-03-20-agents/a7e5abbab2b281a0d.txt +100 -0
package/docs/research/2026-03-20-agents/a8dbadc66cd0d7d5a.txt +95 -0
package/docs/research/2026-03-20-agents/a904d9f45d6b86a6d.txt +75 -0
package/docs/research/2026-03-20-agents/a927659a942ee7f60.txt +102 -0
package/docs/research/2026-03-20-agents/a962cb569191f7583.txt +125 -0
package/docs/research/2026-03-20-agents/aab6decea538aac41.txt +148 -0
package/docs/research/2026-03-20-agents/abd58b853dd938a1b.txt +295 -0
package/docs/research/2026-03-20-agents/ac009da573eff7f65.txt +100 -0
package/docs/research/2026-03-20-agents/ac1bc783364405e5f.txt +190 -0
package/docs/research/2026-03-20-agents/aca5e2b57fde152a0.txt +132 -0
package/docs/research/2026-03-20-agents/ad849b8c0a7e95b8b.txt +176 -0
package/docs/research/2026-03-20-agents/adc2b12a4da32c962.txt +258 -0
package/docs/research/2026-03-20-agents/af97caaaa9a80e4cb.txt +146 -0
package/docs/research/2026-03-20-agents/afc5faceee368b3ca.txt +111 -0
package/docs/research/2026-03-20-agents/afdb282d866e3c1e4.txt +164 -0
package/docs/research/2026-03-20-agents/afe9d1f61c02b1e8d.txt +299 -0
package/docs/research/2026-03-20-agents/b4hmkwril.txt +1856 -0
package/docs/research/2026-03-20-agents/b80ptk89g.txt +1856 -0
package/docs/research/2026-03-20-agents/bf54s1jss.txt +1150 -0
package/docs/research/2026-03-20-agents/bhd6kq2kx.txt +1856 -0
package/docs/research/2026-03-20-agents/bmb2fodyr.txt +988 -0
package/docs/research/2026-03-20-agents/bmmsrij8i.txt +826 -0
package/docs/research/2026-03-20-agents/bn4t2ywpu.txt +2175 -0
package/docs/research/2026-03-20-agents/bu22t9f1z.txt +0 -0
package/docs/research/2026-03-20-agents/bwvl98v2p.txt +738 -0
package/docs/research/2026-03-20-agents/psych-a3697a7fd06eb64fd.txt +135 -0
package/docs/research/2026-03-20-agents/psych-a37776fabc870feae.txt +123 -0
package/docs/research/2026-03-20-agents/psych-a5b1fe05c0589efaf.txt +2 -0
package/docs/research/2026-03-20-agents/psych-a95c15b1f29424435.txt +76 -0
package/docs/research/2026-03-20-agents/psych-a9c26f4d9172dde7c.txt +2 -0
package/docs/research/2026-03-20-agents/psych-aa19c69f0ca2c5ad3.txt +2 -0
package/docs/research/2026-03-20-agents/psych-aa4e4cb70e1be5ecb.txt +95 -0
package/docs/research/2026-03-20-agents/psych-ab5b302f26a554663.txt +102 -0
package/docs/research/2026-03-20-deep-research-complete.md +101 -0
package/docs/research/2026-03-20-deep-research-status.md +38 -0
package/docs/research/2026-03-20-enforcement-research.md +107 -0
package/expertise/composition-map.yaml +27 -8
package/expertise/digests/reviewer/ai-coding-digest.md +83 -0
package/expertise/digests/reviewer/architectural-thinking-digest.md +63 -0
package/expertise/digests/reviewer/architecture-antipatterns-digest.md +49 -0
package/expertise/digests/reviewer/code-smells-digest.md +53 -0
package/expertise/digests/reviewer/coupling-cohesion-digest.md +54 -0
package/expertise/digests/reviewer/ddd-digest.md +60 -0
package/expertise/digests/reviewer/dependency-risk-digest.md +40 -0
package/expertise/digests/reviewer/error-handling-digest.md +55 -0
package/expertise/digests/reviewer/review-methodology-digest.md +49 -0
package/exports/hosts/claude/.claude/commands/learn.md +61 -8
package/exports/hosts/claude/.claude/settings.json +7 -6
package/exports/hosts/claude/export.manifest.json +6 -3
package/exports/hosts/claude/host-package.json +3 -0
package/exports/hosts/codex/export.manifest.json +6 -3
package/exports/hosts/codex/host-package.json +3 -0
package/exports/hosts/cursor/.cursor/hooks.json +6 -6
package/exports/hosts/cursor/export.manifest.json +6 -3
package/exports/hosts/cursor/host-package.json +3 -0
package/exports/hosts/gemini/export.manifest.json +6 -3
package/exports/hosts/gemini/host-package.json +3 -0
package/hooks/definitions/pretooluse_dispatcher.yaml +26 -0
package/hooks/definitions/pretooluse_pipeline_guard.yaml +22 -0
package/hooks/definitions/stop_pipeline_gate.yaml +22 -0
package/hooks/hooks.json +7 -6
package/hooks/pretooluse-dispatcher +84 -0
package/hooks/pretooluse-pipeline-guard +9 -0
package/hooks/stop-pipeline-gate +9 -0
package/package.json +2 -2
package/schemas/decision.schema.json +15 -0
package/schemas/hook.schema.json +4 -1
package/skills/TEMPLATE-3-ZONE.md +160 -0
package/skills/brainstorming/SKILL.md +127 -23
package/skills/clarifier/SKILL.md +175 -18
package/skills/claude-cli/SKILL.md +91 -12
package/skills/codex-cli/SKILL.md +91 -12
package/skills/debugging/SKILL.md +133 -38
package/skills/design/SKILL.md +173 -37
package/skills/dispatching-parallel-agents/SKILL.md +129 -31
package/skills/executing-plans/SKILL.md +113 -25
package/skills/executor/SKILL.md +185 -21
package/skills/finishing-a-development-branch/SKILL.md +107 -18
package/skills/gemini-cli/SKILL.md +91 -12
package/skills/humanize/SKILL.md +92 -13
package/skills/init-pipeline/SKILL.md +90 -17
package/skills/prepare-next/SKILL.md +93 -24
package/skills/receiving-code-review/SKILL.md +90 -16
package/skills/requesting-code-review/SKILL.md +100 -24
package/skills/requesting-code-review/code-reviewer.md +29 -17
package/skills/reviewer/SKILL.md +190 -50
package/skills/run-audit/SKILL.md +92 -15
package/skills/scan-project/SKILL.md +93 -14
package/skills/self-audit/SKILL.md +113 -39
package/skills/skill-research/SKILL.md +94 -7
package/skills/subagent-driven-development/SKILL.md +129 -30
package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +30 -2
package/skills/subagent-driven-development/implementer-prompt.md +40 -27
package/skills/subagent-driven-development/spec-reviewer-prompt.md +25 -12
package/skills/tdd/SKILL.md +125 -20
package/skills/using-git-worktrees/SKILL.md +118 -28
package/skills/using-skills/SKILL.md +116 -29
package/skills/verification/SKILL.md +127 -22
package/skills/wazir/SKILL.md +517 -153
package/skills/writing-plans/SKILL.md +134 -28
package/skills/writing-skills/SKILL.md +91 -13
package/skills/writing-skills/anthropic-best-practices.md +104 -64
package/skills/writing-skills/persuasion-principles.md +100 -34
package/tooling/src/capture/command.js +29 -1
package/tooling/src/capture/decision.js +40 -0
package/tooling/src/capture/store.js +1 -0
package/tooling/src/config/depth-table.js +60 -0
package/tooling/src/export/compiler.js +7 -8
package/tooling/src/guards/guardrail-functions.js +131 -0
package/tooling/src/guards/phase-prerequisite-guard.js +39 -3
package/tooling/src/hooks/pretooluse-dispatcher.js +300 -0
package/tooling/src/hooks/pretooluse-pipeline-guard.js +141 -0
package/tooling/src/hooks/stop-pipeline-gate.js +92 -0
package/tooling/src/learn/pipeline.js +177 -0
package/tooling/src/state/db.js +251 -2
package/tooling/src/state/pipeline-state.js +262 -0
package/wazir.manifest.yaml +3 -0
package/workflows/learn.md +61 -8

package/skills/writing-skills/persuasion-principles.md CHANGED Viewed

@@ -2,49 +2,115 @@
 ## Overview
-LLMs respond to the same persuasion principles as humans. Understanding this psychology helps you design more effective skills - not to manipulate, but to ensure critical practices are followed even under pressure.
+LLMs exhibit statistical compliance biases that can be leveraged to improve instruction following. This is not psychology applied to machines — it is empirical prompt engineering grounded in attention mechanics, training distribution effects, and measured compliance rates.
-**Research foundation:** Meincke et al. (2025) tested 7 persuasion principles with N=28,000 AI conversations. Persuasion techniques more than doubled compliance rates (33% -> 72%, p < .001).
+**Research foundation:** Meincke et al. (2025) tested 7 persuasion principles with N=28,000 AI conversations. Commitment priming approached 100% compliance. Positive directive framing consistently outperformed negative framing. Authority framing lifted compliance by ~40pp.
-## The Seven Principles
+## Principles Ranked by Evidence Strength
-### 1. Authority
-- Imperative language: "YOU MUST", "Never", "Always"
-- Non-negotiable framing: "No exceptions"
-- Eliminates decision fatigue and rationalization
+### Tier 1: Strong Evidence, Large Effect
-### 2. Commitment
-- Require announcements: "Announce skill usage"
-- Force explicit choices: "Choose A, B, or C"
-- Use tracking: TodoWrite for checklists
+**1. Commitment Priming (highest impact)**
+- Have the model announce its plan before executing
+- Autoregressive consistency: once the model generates "I will do X", it is statistically more likely to do X
+- Implementation: "Before executing, state which steps you will perform"
+- Measured: near-100% compliance after self-commitment in Meincke et al.
-### 3. Scarcity
-- Time-bound requirements: "Before proceeding"
-- Sequential dependencies: "Immediately after X"
-- Prevents procrastination
+**2. Positive Directive Framing**
+- "Always do X" consistently outperforms "Never do Y"
+- Token generation selects what to produce, not what to avoid
+- Negative instructions ("do NOT mention X") can paradoxically increase mentions
+- Use negative framing ONLY for critical guardrails with a positive alternative: "Do NOT skip review. Instead, run review quickly."
-### 4. Social Proof
-- Universal patterns: "Every time", "Always"
-- Failure modes: "X without Y = failure"
-- Establishes norms
+**3. Structural Isolation (XML Tags)**
+- Claude is fine-tuned to attend to XML tag boundaries
+- Tags create attention-weight spikes and trust boundaries
+- Use `<rules>`, `<instructions>`, `<output_format>` for hard boundaries
+- Hybrid XML+markdown is optimal: XML for structure, markdown for formatting within sections
-### 5. Unity
-- Collaborative language: "our codebase", "we're colleagues"
-- Shared goals: "we both want quality"
+**4. Positional Privilege (Primacy + Recency)**
+- First ~500 tokens: ~95% compliance (primacy zone)
+- Last ~500 tokens: ~85% compliance (recency zone)
+- Middle of long context: ~65-75% compliance (lost in the middle)
+- Critical rules go at beginning AND end. Never only in the middle.
-### 6. Reciprocity
-- Obligation to return favors
-- "I'll give you full context, you give me honest assessment"
+### Tier 2: Strong Evidence, Moderate Effect
-### 7. Liking
-- Avoid in discipline-enforcing skills
-- Use sparingly for collaborative skills
+**5. Authority / Role Assignment**
+- "You are a senior security auditor responsible for..." activates domain-specific patterns
+- +40pp lift in Meincke et al.
+- Expert personas produce more accurate, more disciplined output
+**6. Consequence Framing**
+- "Skipping this step causes silent regressions that waste hours of debugging"
+- Provides reasoning context for why compliance matters
+- More effective than abstract rules ("always follow the process")
+**7. Implementation Intentions (IF-THEN rules)**
+- "IF user says skip → THEN say 'Running it quickly' and execute"
+- Pre-decides the response — no judgment call needed at runtime
+- d=0.65 across 94 psychology studies (Gollwitzer). Maps directly to LLM prompt design.
+- Single most actionable technique for skill authors
+**8. Redundant Reinforcement**
+- State the rule, show an example, reference it in the output format, add a constraint tag
+- Multiple encoding paths survive when any single one fails
+- Paraphrased repetition (2-3x) outperforms verbatim repetition
+### Tier 3: Context-Dependent Effect
+**9. Social Proof**
+- "Standard practice is..." or "All production systems follow this pattern"
+- Effective when baseline compliance is already moderate (+6pp)
+**10. Urgency / Scarcity**
+- "This must be done correctly the first time; there is no retry"
+- Increases both compliance and output variance — use sparingly
+**11. Moral / Ethical Framing**
+- "Omitting this would produce misleading output"
+- Effective for Claude specifically due to Constitutional AI training
+- Frame positively (good outcome of compliance) not negatively
+## Anti-Patterns
+| Pattern | Problem |
+|---------|---------|
+| Negative instructions without alternatives | "Don't do X" fails — model must activate X to evaluate constraint |
+| Instruction overload (>12 constraints) | Steep compliance drop after ~12 accumulated constraints |
+| Threats without specifics | "You will be punished" increases variance without improving median |
+| Reciprocity framing | "I helped you, now help me" — weakest principle, only +11pp |
+| Relying solely on alignment | 80% of enterprises reported injection incidents. Structural defenses needed. |
 ## Principle Combinations by Skill Type
-| Skill Type | Use | Avoid |
-|------------|-----|-------|
-| Discipline-enforcing | Authority + Commitment + Social Proof | Liking, Reciprocity |
-| Guidance/technique | Moderate Authority + Unity | Heavy authority |
-| Collaborative | Unity + Commitment | Authority, Liking |
-| Reference | Clarity only | All persuasion |
+| Skill Type | Primary Techniques | Avoid |
+|------------|-------------------|-------|
+| Discipline-enforcing (TDD, verification) | Commitment + Implementation Intentions + Positional Privilege + Authority | Liking, Reciprocity |
+| Process-governing (clarifier, executor) | Commitment + Consequence Framing + Structural Isolation | Heavy emotional framing |
+| Collaborative (brainstorming, design) | Moderate Authority + Implementation Intentions | Over-constraining creative steps |
+| Reference (docs, guides) | Structural Isolation + Positional Privilege | All persuasion — clarity only |
+## The 3-Zone Architecture
+Apply these principles through the 3-zone skill layout:
+- **Zone 1 (Primacy):** Identity + Iron Laws + Priority Stack — leverages positional privilege + authority + commitment
+- **Zone 2 (Process):** IF-THEN rules + decision tables + gate functions — leverages implementation intentions + structural isolation
+- **Zone 3 (Recency):** Restated laws + Red Flags + meta-instruction — leverages recency + redundant reinforcement + consequence framing
+## Temporal Testing Advisory
+Prompt engineering techniques lose effectiveness as models improve. Re-test skill compliance every major model version. Include a "last verified" date on persuasion-dependent skills.
+**Last verified:** Claude Opus 4.6, March 2026
+## Sources
+- Meincke et al. (2025). "Call Me A Jerk: Persuading AI to Comply" (N=28,000, SSRN)
+- Liu et al. (2024). "Lost in the Middle" (TACL, arXiv:2307.03172)
+- Wallace et al. (2024). "The Instruction Hierarchy" (OpenAI, arXiv:2404.13208)
+- Gollwitzer (1999). Implementation Intentions (d=0.65, 94 studies meta-analysis)
+- EmotionPrompt (2023). Emotional framing effects (arXiv:2307.11760)
+- Zhou et al. (2023). IFEval benchmark (arXiv:2311.07911)
+- Anthropic (2024). Claude Model Spec — instruction hierarchy documentation

package/tooling/src/capture/command.js CHANGED Viewed

@@ -19,6 +19,7 @@ import {
 } from './store.js';
 import { readRunConfig, getPhaseLoopCap } from './run-config.js';
 import { readUsage, generateReport, initUsage, recordCaptureSavings, recordPhaseUsage } from './usage.js';
+import { appendDecision } from './decision.js';
 import { evaluateLoopCapGuard } from '../guards/loop-cap-guard.js';
 import { evaluatePhasePrerequisiteGuard } from '../guards/phase-prerequisite-guard.js';
@@ -73,6 +74,8 @@ function resolveCaptureContext(parsed, context = {}) {
       'command',
       'exit-code',
       'task-id',
+      'decision',
+      'reason',
     ],
   });
   const stateRoot = resolveStateRoot(projectRoot, manifest, {
@@ -388,6 +391,29 @@ function handleUsage(parsed, context = {}) {
   };
 }
+function handleDecision(parsed, context = {}) {
+  const { stateRoot, options } = resolveCaptureContext(parsed, context);
+  requireOption(options, 'run', 'Usage: wazir capture decision --run <id> --phase <phase> --decision "<text>" --reason "<text>" [--task-id <id>] [--state-root <path>] [--json]');
+  requireOption(options, 'phase', 'Usage: wazir capture decision --run <id> --phase <phase> --decision "<text>" --reason "<text>" [--task-id <id>] [--state-root <path>] [--json]');
+  requireOption(options, 'decision', 'Usage: wazir capture decision --run <id> --phase <phase> --decision "<text>" --reason "<text>" [--task-id <id>] [--state-root <path>] [--json]');
+  requireOption(options, 'reason', 'Usage: wazir capture decision --run <id> --phase <phase> --decision "<text>" --reason "<text>" [--task-id <id>] [--state-root <path>] [--json]');
+  const runPaths = getRunPaths(stateRoot, options.run);
+  appendDecision(runPaths, {
+    phase: options.phase,
+    decision: options.decision,
+    reason: options.reason,
+    task_id: options.taskId,
+  });
+  return formatResult({
+    run_id: options.run,
+    event: 'decision',
+    decisions_path: runPaths.decisionsPath,
+  }, { json: options.json });
+}
 function handleLoopCheck(parsed, context = {}) {
   const { stateRoot, options } = resolveCaptureContext(parsed, context);
@@ -486,10 +512,12 @@ export function runCaptureCommand(parsed, context = {}) {
         return handleUsage(parsed, context);
       case 'loop-check':
         return handleLoopCheck(parsed, context);
+      case 'decision':
+        return handleDecision(parsed, context);
       default:
         return {
           exitCode: 1,
-          stderr: 'Usage: wazir capture <init|event|route|output|summary|usage|loop-check> ...\n',
+          stderr: 'Usage: wazir capture <init|event|route|output|summary|usage|loop-check|decision> ...\n',
         };
     }
   } catch (error) {

package/tooling/src/capture/decision.js ADDED Viewed

@@ -0,0 +1,40 @@
+import fs from 'node:fs';
+/**
+ * Append a decision entry to the run's NDJSON log.
+ *
+ * @param {object} runPaths - Run paths object (must include decisionsPath)
+ * @param {object} entry - { phase, decision, reason, task_id? }
+ */
+export function appendDecision(runPaths, { phase, decision, reason, task_id }) {
+  const record = {
+    timestamp: new Date().toISOString(),
+    phase: phase ?? 'unknown',
+    decision: decision ?? '',
+    reason: reason ?? '',
+  };
+  if (task_id) {
+    record.task_id = task_id;
+  }
+  fs.appendFileSync(runPaths.decisionsPath, JSON.stringify(record) + '\n');
+  return runPaths.decisionsPath;
+}
+/**
+ * Read all entries from a run's decisions log.
+ *
+ * @param {object} runPaths - Run paths object (must include decisionsPath)
+ * @returns {Array<object>}
+ */
+export function readDecisions(runPaths) {
+  if (!fs.existsSync(runPaths.decisionsPath)) return [];
+  return fs.readFileSync(runPaths.decisionsPath, 'utf8')
+    .split('\n')
+    .filter(line => line.trim())
+    .map(line => {
+      try { return JSON.parse(line); }
+      catch { return null; }
+    })
+    .filter(Boolean);
+}

package/tooling/src/capture/store.js CHANGED Viewed

@@ -19,6 +19,7 @@ export function getRunPaths(stateRoot, runId) {
     capturesDir,
     statusPath: path.join(runRoot, 'status.json'),
     eventsPath: path.join(runRoot, 'events.ndjson'),
+    decisionsPath: path.join(runRoot, 'decisions.ndjson'),
     summaryPath: path.join(runRoot, 'summary.md'),
     usagePath: path.join(runRoot, 'usage.json'),
   };

package/tooling/src/config/depth-table.js ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * Canonical depth parameter table.
+ *
+ * Single source of truth for all depth-dependent behavior across the pipeline.
+ * Skills reference these values conceptually; hooks and tooling import directly.
+ */
+export const DEPTH_LEVELS = new Set(['quick', 'standard', 'deep']);
+export const DEPTH_TABLE = {
+  quick: {
+    review_passes: 3,
+    loop_cap: 5,
+    heartbeat_max_silence_s: 180,
+    research_intensity: 'minimal',
+    challenge_intensity: 'surface',
+    spec_hardening_passes: 1,
+    design_review_passes: 1,
+    time_estimate_label: '~15-30 min',
+  },
+  standard: {
+    review_passes: 5,
+    loop_cap: 10,
+    heartbeat_max_silence_s: 120,
+    research_intensity: 'balanced',
+    challenge_intensity: 'balanced',
+    spec_hardening_passes: 3,
+    design_review_passes: 3,
+    time_estimate_label: '~45-90 min',
+  },
+  deep: {
+    review_passes: 7,
+    loop_cap: 15,
+    heartbeat_max_silence_s: 90,
+    research_intensity: 'thorough',
+    challenge_intensity: 'adversarial',
+    spec_hardening_passes: 5,
+    design_review_passes: 5,
+    time_estimate_label: '~2-3 hrs',
+  },
+};
+/**
+ * Get a specific depth parameter value.
+ *
+ * @param {string} depth  — 'quick' | 'standard' | 'deep' (defaults to 'standard')
+ * @param {string} param  — parameter name from the depth table
+ * @returns {*} the parameter value
+ */
+export function getDepthParam(depth, param) {
+  const level = depth ?? 'standard';
+  if (!DEPTH_LEVELS.has(level)) {
+    throw new Error(`Unknown depth level: "${level}". Valid levels: ${[...DEPTH_LEVELS].join(', ')}`);
+  }
+  const entry = DEPTH_TABLE[level];
+  if (!(param in entry)) {
+    throw new Error(`Unknown depth parameter: "${param}". Valid params: ${Object.keys(entry).join(', ')}`);
+  }
+  return entry[param];
+}

package/tooling/src/export/compiler.js CHANGED Viewed

@@ -91,8 +91,7 @@ function renderCommonInstructions(host, manifest) {
 const DEFAULT_CLAUDE_HOOKS = {
   hooks: {
     PreToolUse: [
-      { matcher: 'Write|Edit', hooks: [{ type: 'command', command: './hooks/protected-path-write-guard' }] },
-      { matcher: 'Bash', hooks: [{ type: 'command', command: './hooks/context-mode-router' }] },
+      { matcher: 'Write|Edit|Bash', hooks: [{ type: 'command', command: './hooks/pretooluse-dispatcher' }] },
     ],
     SessionStart: [
       { hooks: [{ type: 'command', command: './hooks/loop-cap-guard' }] },
@@ -115,21 +114,21 @@ function renderCursorHooks() {
   return JSON.stringify({
     hooks: [
       {
-        name: 'protected-path-write-guard',
-        command: './hooks/protected-path-write-guard',
+        name: 'pretooluse-dispatcher',
+        command: './hooks/pretooluse-dispatcher',
       },
       {
         name: 'loop-cap-guard',
         command: './hooks/loop-cap-guard',
       },
-      {
-        name: 'context-mode-router',
-        command: './hooks/context-mode-router',
-      },
       {
         name: 'session-start',
         command: './hooks/session-start',
       },
+      {
+        name: 'stop-pipeline-gate',
+        command: './hooks/stop-pipeline-gate',
+      },
     ],
   }, null, 2);
 }

package/tooling/src/guards/guardrail-functions.js ADDED Viewed

@@ -0,0 +1,131 @@
+import fs from 'node:fs';
+import path from 'node:path';
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function fileExistsAndNonEmpty(filePath) {
+  if (!fs.existsSync(filePath)) return false;
+  const stat = fs.statSync(filePath);
+  return stat.size > 0;
+}
+function result(passed, reason, missing = []) {
+  return { passed, reason, ...(missing.length > 0 ? { missing } : {}) };
+}
+// ---------------------------------------------------------------------------
+// Per-phase validators
+// ---------------------------------------------------------------------------
+const CLARIFY_ARTIFACTS = [
+  'clarified/clarification.md',
+  'clarified/spec-hardened.md',
+  'clarified/design.md',
+  'clarified/execution-plan.md',
+];
+/**
+ * Validates clarify phase produced all required artifacts.
+ */
+export function validateClarifyComplete(_state, runDir) {
+  const missing = [];
+  for (const relPath of CLARIFY_ARTIFACTS) {
+    const full = path.join(runDir, relPath);
+    if (!fileExistsAndNonEmpty(full)) {
+      missing.push(relPath);
+    }
+  }
+  if (missing.length > 0) {
+    return result(false, `Missing clarify artifacts: ${missing.join(', ')}`, missing);
+  }
+  return result(true, 'All clarify artifacts present and non-empty.');
+}
+/**
+ * Validates execute phase: at least one task artifact dir and verification proof.
+ */
+export function validateExecuteComplete(_state, runDir) {
+  const missing = [];
+  const artifactsDir = path.join(runDir, 'artifacts');
+  // Check for at least one task-NNN directory with content
+  const taskDirs = fs.existsSync(artifactsDir)
+    ? fs.readdirSync(artifactsDir).filter(d => d.startsWith('task-') && fs.statSync(path.join(artifactsDir, d)).isDirectory())
+    : [];
+  if (taskDirs.length === 0) {
+    missing.push('artifacts/task-NNN/ (no task artifacts found)');
+  }
+  // Check verification proof
+  const proofPath = path.join(artifactsDir, 'verification-proof.md');
+  if (!fileExistsAndNonEmpty(proofPath)) {
+    missing.push('artifacts/verification-proof.md');
+  }
+  if (missing.length > 0) {
+    return result(false, `Missing execute artifacts: ${missing.join(', ')}`, missing);
+  }
+  return result(true, `Execute complete: ${taskDirs.length} task(s) + verification proof.`);
+}
+/**
+ * Validates verify phase: proof exists and has substantive content.
+ */
+export function validateVerifyComplete(_state, runDir) {
+  const proofPath = path.join(runDir, 'artifacts', 'verification-proof.md');
+  if (!fileExistsAndNonEmpty(proofPath)) {
+    return result(false, 'Verification proof missing or empty.', ['artifacts/verification-proof.md']);
+  }
+  const content = fs.readFileSync(proofPath, 'utf8');
+  if (content.trim().length < 20) {
+    return result(false, 'Verification proof exists but has insufficient content.', ['artifacts/verification-proof.md']);
+  }
+  return result(true, 'Verification proof present with evidence.');
+}
+/**
+ * Validates review phase: verdict.json with a numeric score.
+ */
+export function validateReviewComplete(_state, runDir) {
+  const verdictPath = path.join(runDir, 'reviews', 'verdict.json');
+  if (!fs.existsSync(verdictPath)) {
+    return result(false, 'Review verdict missing.', ['reviews/verdict.json']);
+  }
+  try {
+    const verdict = JSON.parse(fs.readFileSync(verdictPath, 'utf8'));
+    if (typeof verdict.score !== 'number') {
+      return result(false, 'Review verdict has no numeric score.', ['reviews/verdict.json (missing score)']);
+    }
+    return result(true, `Review complete with score ${verdict.score}.`);
+  } catch {
+    return result(false, 'Review verdict is not valid JSON.', ['reviews/verdict.json']);
+  }
+}
+// ---------------------------------------------------------------------------
+// Dispatcher
+// ---------------------------------------------------------------------------
+const VALIDATORS = {
+  clarify: validateClarifyComplete,
+  execute: validateExecuteComplete,
+  verify: validateVerifyComplete,
+  review: validateReviewComplete,
+};
+/**
+ * Run the guardrail for a given phase.
+ */
+export function runGuardrail(phase, state, runDir) {
+  const validator = VALIDATORS[phase];
+  if (!validator) {
+    throw new Error(`Unknown phase for guardrail: ${phase}`);
+  }
+  return validator(state, runDir);
+}

package/tooling/src/guards/phase-prerequisite-guard.js CHANGED Viewed

@@ -143,10 +143,42 @@ export function evaluatePhasePrerequisiteGuard(payload) {
   const requiredPhaseExits = prerequisites.required_phase_exits ?? [];
   const missingArtifacts = [];
+  const failedProofs = [];
   for (const artifact of requiredArtifacts) {
     const artifactPath = path.join(runPaths.runRoot, artifact);
     if (!fs.existsSync(artifactPath)) {
       missingArtifacts.push(artifact);
+      continue;
+    }
+    const basename = path.basename(artifact);
+    // Content validation for proof JSON files (e.g. proof-task-001.json, verification-proof.json)
+    if (basename.includes('proof') && basename.endsWith('.json')) {
+      try {
+        const content = fs.readFileSync(artifactPath, 'utf8');
+        const parsed = JSON.parse(content);
+        if (parsed.all_passed !== true) {
+          failedProofs.push(`${artifact}: all_passed is not true (got ${JSON.stringify(parsed.all_passed)})`);
+        }
+      } catch {
+        // Fail closed: malformed JSON blocks the phase
+        failedProofs.push(`${artifact}: malformed or unreadable JSON`);
+      }
+      continue;
+    }
+    // Content validation for verification-proof.md
+    if (basename === 'verification-proof.md') {
+      try {
+        const content = fs.readFileSync(artifactPath, 'utf8');
+        const lower = content.toLowerCase();
+        if (!lower.includes('status: pass') && !content.includes('PASS')) {
+          failedProofs.push(`${artifact}: does not contain "status: pass" or "PASS"`);
+        }
+      } catch {
+        failedProofs.push(`${artifact}: unreadable`);
+      }
     }
   }
@@ -158,10 +190,10 @@ export function evaluatePhasePrerequisiteGuard(payload) {
     }
   }
-  // OR-logic for resumed runs: if all artifacts exist, pass even without phase_exit events.
+  // OR-logic for resumed runs: if all artifacts exist and proofs pass, allow even without phase_exit events.
   // Artifacts are the hard evidence; phase_exits are supplementary.
-  // But if artifacts are missing, phase_exits alone are not sufficient.
-  if (missingArtifacts.length === 0) {
+  // But if artifacts are missing or proofs fail, phase_exits alone are not sufficient.
+  if (missingArtifacts.length === 0 && failedProofs.length === 0) {
     return {
       allowed: true,
       reason: `All prerequisite artifacts present for phase ${phase}.`,
@@ -172,6 +204,9 @@ export function evaluatePhasePrerequisiteGuard(payload) {
   if (missingArtifacts.length > 0) {
     reasons.push(`Missing artifacts: ${missingArtifacts.join(', ')}`);
   }
+  if (failedProofs.length > 0) {
+    reasons.push(`Failed proof validation: ${failedProofs.join('; ')}`);
+  }
   if (missingPhaseExits.length > 0) {
     reasons.push(`Missing phase exits: ${missingPhaseExits.join(', ')}`);
   }
@@ -180,6 +215,7 @@ export function evaluatePhasePrerequisiteGuard(payload) {
     allowed: false,
     reason: reasons.join('. '),
     missing_artifacts: missingArtifacts.length > 0 ? missingArtifacts : undefined,
+    failed_proofs: failedProofs.length > 0 ? failedProofs : undefined,
     missing_phase_exits: missingPhaseExits.length > 0 ? missingPhaseExits : undefined,
   };
 }