@wazir-dev/cli 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/CHANGELOG.md +17 -2
  2. package/docs/research/2026-03-20-agents/a18fb002157904af5.txt +187 -0
  3. package/docs/research/2026-03-20-agents/a1d0ac79ac2f11e6f.txt +2 -0
  4. package/docs/research/2026-03-20-agents/a324079de037abd7c.txt +198 -0
  5. package/docs/research/2026-03-20-agents/a357586bccfafb0e5.txt +256 -0
  6. package/docs/research/2026-03-20-agents/a4365394e4d753105.txt +137 -0
  7. package/docs/research/2026-03-20-agents/a492af28bc52d3613.txt +136 -0
  8. package/docs/research/2026-03-20-agents/a4984db0b6a8eee07.txt +124 -0
  9. package/docs/research/2026-03-20-agents/a5b30e59d34bbb062.txt +214 -0
  10. package/docs/research/2026-03-20-agents/a5cf7829dab911586.txt +165 -0
  11. package/docs/research/2026-03-20-agents/a607157c30dd97c9e.txt +96 -0
  12. package/docs/research/2026-03-20-agents/a60b68b1e19d1e16b.txt +115 -0
  13. package/docs/research/2026-03-20-agents/a722af01c5594aba0.txt +166 -0
  14. package/docs/research/2026-03-20-agents/a787bdc516faa5829.txt +181 -0
  15. package/docs/research/2026-03-20-agents/a7c46d1bba1056ed2.txt +132 -0
  16. package/docs/research/2026-03-20-agents/a7e5abbab2b281a0d.txt +100 -0
  17. package/docs/research/2026-03-20-agents/a8dbadc66cd0d7d5a.txt +95 -0
  18. package/docs/research/2026-03-20-agents/a904d9f45d6b86a6d.txt +75 -0
  19. package/docs/research/2026-03-20-agents/a927659a942ee7f60.txt +102 -0
  20. package/docs/research/2026-03-20-agents/a962cb569191f7583.txt +125 -0
  21. package/docs/research/2026-03-20-agents/aab6decea538aac41.txt +148 -0
  22. package/docs/research/2026-03-20-agents/abd58b853dd938a1b.txt +295 -0
  23. package/docs/research/2026-03-20-agents/ac009da573eff7f65.txt +100 -0
  24. package/docs/research/2026-03-20-agents/ac1bc783364405e5f.txt +190 -0
  25. package/docs/research/2026-03-20-agents/aca5e2b57fde152a0.txt +132 -0
  26. package/docs/research/2026-03-20-agents/ad849b8c0a7e95b8b.txt +176 -0
  27. package/docs/research/2026-03-20-agents/adc2b12a4da32c962.txt +258 -0
  28. package/docs/research/2026-03-20-agents/af97caaaa9a80e4cb.txt +146 -0
  29. package/docs/research/2026-03-20-agents/afc5faceee368b3ca.txt +111 -0
  30. package/docs/research/2026-03-20-agents/afdb282d866e3c1e4.txt +164 -0
  31. package/docs/research/2026-03-20-agents/afe9d1f61c02b1e8d.txt +299 -0
  32. package/docs/research/2026-03-20-agents/b4hmkwril.txt +1856 -0
  33. package/docs/research/2026-03-20-agents/b80ptk89g.txt +1856 -0
  34. package/docs/research/2026-03-20-agents/bf54s1jss.txt +1150 -0
  35. package/docs/research/2026-03-20-agents/bhd6kq2kx.txt +1856 -0
  36. package/docs/research/2026-03-20-agents/bmb2fodyr.txt +988 -0
  37. package/docs/research/2026-03-20-agents/bmmsrij8i.txt +826 -0
  38. package/docs/research/2026-03-20-agents/bn4t2ywpu.txt +2175 -0
  39. package/docs/research/2026-03-20-agents/bu22t9f1z.txt +0 -0
  40. package/docs/research/2026-03-20-agents/bwvl98v2p.txt +738 -0
  41. package/docs/research/2026-03-20-agents/psych-a3697a7fd06eb64fd.txt +135 -0
  42. package/docs/research/2026-03-20-agents/psych-a37776fabc870feae.txt +123 -0
  43. package/docs/research/2026-03-20-agents/psych-a5b1fe05c0589efaf.txt +2 -0
  44. package/docs/research/2026-03-20-agents/psych-a95c15b1f29424435.txt +76 -0
  45. package/docs/research/2026-03-20-agents/psych-a9c26f4d9172dde7c.txt +2 -0
  46. package/docs/research/2026-03-20-agents/psych-aa19c69f0ca2c5ad3.txt +2 -0
  47. package/docs/research/2026-03-20-agents/psych-aa4e4cb70e1be5ecb.txt +95 -0
  48. package/docs/research/2026-03-20-agents/psych-ab5b302f26a554663.txt +102 -0
  49. package/docs/research/2026-03-20-deep-research-complete.md +101 -0
  50. package/docs/research/2026-03-20-deep-research-status.md +38 -0
  51. package/docs/research/2026-03-20-enforcement-research.md +107 -0
  52. package/expertise/composition-map.yaml +27 -8
  53. package/expertise/digests/reviewer/ai-coding-digest.md +83 -0
  54. package/expertise/digests/reviewer/architectural-thinking-digest.md +63 -0
  55. package/expertise/digests/reviewer/architecture-antipatterns-digest.md +49 -0
  56. package/expertise/digests/reviewer/code-smells-digest.md +53 -0
  57. package/expertise/digests/reviewer/coupling-cohesion-digest.md +54 -0
  58. package/expertise/digests/reviewer/ddd-digest.md +60 -0
  59. package/expertise/digests/reviewer/dependency-risk-digest.md +40 -0
  60. package/expertise/digests/reviewer/error-handling-digest.md +55 -0
  61. package/expertise/digests/reviewer/review-methodology-digest.md +49 -0
  62. package/exports/hosts/claude/.claude/commands/learn.md +61 -8
  63. package/exports/hosts/claude/.claude/settings.json +7 -6
  64. package/exports/hosts/claude/export.manifest.json +6 -3
  65. package/exports/hosts/claude/host-package.json +3 -0
  66. package/exports/hosts/codex/export.manifest.json +6 -3
  67. package/exports/hosts/codex/host-package.json +3 -0
  68. package/exports/hosts/cursor/.cursor/hooks.json +6 -6
  69. package/exports/hosts/cursor/export.manifest.json +6 -3
  70. package/exports/hosts/cursor/host-package.json +3 -0
  71. package/exports/hosts/gemini/export.manifest.json +6 -3
  72. package/exports/hosts/gemini/host-package.json +3 -0
  73. package/hooks/definitions/pretooluse_dispatcher.yaml +26 -0
  74. package/hooks/definitions/pretooluse_pipeline_guard.yaml +22 -0
  75. package/hooks/definitions/stop_pipeline_gate.yaml +22 -0
  76. package/hooks/hooks.json +7 -6
  77. package/hooks/pretooluse-dispatcher +84 -0
  78. package/hooks/pretooluse-pipeline-guard +9 -0
  79. package/hooks/stop-pipeline-gate +9 -0
  80. package/package.json +2 -2
  81. package/schemas/decision.schema.json +15 -0
  82. package/schemas/hook.schema.json +4 -1
  83. package/skills/TEMPLATE-3-ZONE.md +160 -0
  84. package/skills/brainstorming/SKILL.md +127 -23
  85. package/skills/clarifier/SKILL.md +175 -18
  86. package/skills/claude-cli/SKILL.md +91 -12
  87. package/skills/codex-cli/SKILL.md +91 -12
  88. package/skills/debugging/SKILL.md +133 -38
  89. package/skills/design/SKILL.md +173 -37
  90. package/skills/dispatching-parallel-agents/SKILL.md +129 -31
  91. package/skills/executing-plans/SKILL.md +113 -25
  92. package/skills/executor/SKILL.md +185 -21
  93. package/skills/finishing-a-development-branch/SKILL.md +107 -18
  94. package/skills/gemini-cli/SKILL.md +91 -12
  95. package/skills/humanize/SKILL.md +92 -13
  96. package/skills/init-pipeline/SKILL.md +90 -17
  97. package/skills/prepare-next/SKILL.md +93 -24
  98. package/skills/receiving-code-review/SKILL.md +90 -16
  99. package/skills/requesting-code-review/SKILL.md +100 -24
  100. package/skills/requesting-code-review/code-reviewer.md +29 -17
  101. package/skills/reviewer/SKILL.md +190 -50
  102. package/skills/run-audit/SKILL.md +92 -15
  103. package/skills/scan-project/SKILL.md +93 -14
  104. package/skills/self-audit/SKILL.md +113 -39
  105. package/skills/skill-research/SKILL.md +94 -7
  106. package/skills/subagent-driven-development/SKILL.md +129 -30
  107. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +30 -2
  108. package/skills/subagent-driven-development/implementer-prompt.md +40 -27
  109. package/skills/subagent-driven-development/spec-reviewer-prompt.md +25 -12
  110. package/skills/tdd/SKILL.md +125 -20
  111. package/skills/using-git-worktrees/SKILL.md +118 -28
  112. package/skills/using-skills/SKILL.md +116 -29
  113. package/skills/verification/SKILL.md +127 -22
  114. package/skills/wazir/SKILL.md +517 -153
  115. package/skills/writing-plans/SKILL.md +134 -28
  116. package/skills/writing-skills/SKILL.md +91 -13
  117. package/skills/writing-skills/anthropic-best-practices.md +104 -64
  118. package/skills/writing-skills/persuasion-principles.md +100 -34
  119. package/tooling/src/capture/command.js +29 -1
  120. package/tooling/src/capture/decision.js +40 -0
  121. package/tooling/src/capture/store.js +1 -0
  122. package/tooling/src/config/depth-table.js +60 -0
  123. package/tooling/src/export/compiler.js +7 -8
  124. package/tooling/src/guards/guardrail-functions.js +131 -0
  125. package/tooling/src/guards/phase-prerequisite-guard.js +39 -3
  126. package/tooling/src/hooks/pretooluse-dispatcher.js +300 -0
  127. package/tooling/src/hooks/pretooluse-pipeline-guard.js +141 -0
  128. package/tooling/src/hooks/stop-pipeline-gate.js +92 -0
  129. package/tooling/src/learn/pipeline.js +177 -0
  130. package/tooling/src/state/db.js +251 -2
  131. package/tooling/src/state/pipeline-state.js +262 -0
  132. package/wazir.manifest.yaml +3 -0
  133. package/workflows/learn.md +61 -8
@@ -2,49 +2,115 @@
2
2
 
3
3
  ## Overview
4
4
 
5
- LLMs respond to the same persuasion principles as humans. Understanding this psychology helps you design more effective skills - not to manipulate, but to ensure critical practices are followed even under pressure.
5
+ LLMs exhibit statistical compliance biases that can be leveraged to improve instruction following. This is not psychology applied to machines it is empirical prompt engineering grounded in attention mechanics, training distribution effects, and measured compliance rates.
6
6
 
7
- **Research foundation:** Meincke et al. (2025) tested 7 persuasion principles with N=28,000 AI conversations. Persuasion techniques more than doubled compliance rates (33% -> 72%, p < .001).
7
+ **Research foundation:** Meincke et al. (2025) tested 7 persuasion principles with N=28,000 AI conversations. Commitment priming approached 100% compliance. Positive directive framing consistently outperformed negative framing. Authority framing lifted compliance by ~40pp.
8
8
 
9
- ## The Seven Principles
9
+ ## Principles Ranked by Evidence Strength
10
10
 
11
- ### 1. Authority
12
- - Imperative language: "YOU MUST", "Never", "Always"
13
- - Non-negotiable framing: "No exceptions"
14
- - Eliminates decision fatigue and rationalization
11
+ ### Tier 1: Strong Evidence, Large Effect
15
12
 
16
- ### 2. Commitment
17
- - Require announcements: "Announce skill usage"
18
- - Force explicit choices: "Choose A, B, or C"
19
- - Use tracking: TodoWrite for checklists
13
+ **1. Commitment Priming (highest impact)**
14
+ - Have the model announce its plan before executing
15
+ - Autoregressive consistency: once the model generates "I will do X", it is statistically more likely to do X
16
+ - Implementation: "Before executing, state which steps you will perform"
17
+ - Measured: near-100% compliance after self-commitment in Meincke et al.
20
18
 
21
- ### 3. Scarcity
22
- - Time-bound requirements: "Before proceeding"
23
- - Sequential dependencies: "Immediately after X"
24
- - Prevents procrastination
19
+ **2. Positive Directive Framing**
20
+ - "Always do X" consistently outperforms "Never do Y"
21
+ - Token generation selects what to produce, not what to avoid
22
+ - Negative instructions ("do NOT mention X") can paradoxically increase mentions
23
+ - Use negative framing ONLY for critical guardrails with a positive alternative: "Do NOT skip review. Instead, run review quickly."
25
24
 
26
- ### 4. Social Proof
27
- - Universal patterns: "Every time", "Always"
28
- - Failure modes: "X without Y = failure"
29
- - Establishes norms
25
+ **3. Structural Isolation (XML Tags)**
26
+ - Claude is fine-tuned to attend to XML tag boundaries
27
+ - Tags create attention-weight spikes and trust boundaries
28
+ - Use `<rules>`, `<instructions>`, `<output_format>` for hard boundaries
29
+ - Hybrid XML+markdown is optimal: XML for structure, markdown for formatting within sections
30
30
 
31
- ### 5. Unity
32
- - Collaborative language: "our codebase", "we're colleagues"
33
- - Shared goals: "we both want quality"
31
+ **4. Positional Privilege (Primacy + Recency)**
32
+ - First ~500 tokens: ~95% compliance (primacy zone)
33
+ - Last ~500 tokens: ~85% compliance (recency zone)
34
+ - Middle of long context: ~65-75% compliance (lost in the middle)
35
+ - Critical rules go at beginning AND end. Never only in the middle.
34
36
 
35
- ### 6. Reciprocity
36
- - Obligation to return favors
37
- - "I'll give you full context, you give me honest assessment"
37
+ ### Tier 2: Strong Evidence, Moderate Effect
38
38
 
39
- ### 7. Liking
40
- - Avoid in discipline-enforcing skills
41
- - Use sparingly for collaborative skills
39
+ **5. Authority / Role Assignment**
40
+ - "You are a senior security auditor responsible for..." activates domain-specific patterns
41
+ - +40pp lift in Meincke et al.
42
+ - Expert personas produce more accurate, more disciplined output
43
+
44
+ **6. Consequence Framing**
45
+ - "Skipping this step causes silent regressions that waste hours of debugging"
46
+ - Provides reasoning context for why compliance matters
47
+ - More effective than abstract rules ("always follow the process")
48
+
49
+ **7. Implementation Intentions (IF-THEN rules)**
50
+ - "IF user says skip → THEN say 'Running it quickly' and execute"
51
+ - Pre-decides the response — no judgment call needed at runtime
52
+ - d=0.65 across 94 psychology studies (Gollwitzer). Maps directly to LLM prompt design.
53
+ - Single most actionable technique for skill authors
54
+
55
+ **8. Redundant Reinforcement**
56
+ - State the rule, show an example, reference it in the output format, add a constraint tag
57
+ - Multiple encoding paths survive when any single one fails
58
+ - Paraphrased repetition (2-3x) outperforms verbatim repetition
59
+
60
+ ### Tier 3: Context-Dependent Effect
61
+
62
+ **9. Social Proof**
63
+ - "Standard practice is..." or "All production systems follow this pattern"
64
+ - Effective when baseline compliance is already moderate (+6pp)
65
+
66
+ **10. Urgency / Scarcity**
67
+ - "This must be done correctly the first time; there is no retry"
68
+ - Increases both compliance and output variance — use sparingly
69
+
70
+ **11. Moral / Ethical Framing**
71
+ - "Omitting this would produce misleading output"
72
+ - Effective for Claude specifically due to Constitutional AI training
73
+ - Frame positively (good outcome of compliance) not negatively
74
+
75
+ ## Anti-Patterns
76
+
77
+ | Pattern | Problem |
78
+ |---------|---------|
79
+ | Negative instructions without alternatives | "Don't do X" fails — model must activate X to evaluate constraint |
80
+ | Instruction overload (>12 constraints) | Steep compliance drop after ~12 accumulated constraints |
81
+ | Threats without specifics | "You will be punished" increases variance without improving median |
82
+ | Reciprocity framing | "I helped you, now help me" — weakest principle, only +11pp |
83
+ | Relying solely on alignment | 80% of enterprises reported injection incidents. Structural defenses needed. |
42
84
 
43
85
  ## Principle Combinations by Skill Type
44
86
 
45
- | Skill Type | Use | Avoid |
46
- |------------|-----|-------|
47
- | Discipline-enforcing | Authority + Commitment + Social Proof | Liking, Reciprocity |
48
- | Guidance/technique | Moderate Authority + Unity | Heavy authority |
49
- | Collaborative | Unity + Commitment | Authority, Liking |
50
- | Reference | Clarity only | All persuasion |
87
+ | Skill Type | Primary Techniques | Avoid |
88
+ |------------|-------------------|-------|
89
+ | Discipline-enforcing (TDD, verification) | Commitment + Implementation Intentions + Positional Privilege + Authority | Liking, Reciprocity |
90
+ | Process-governing (clarifier, executor) | Commitment + Consequence Framing + Structural Isolation | Heavy emotional framing |
91
+ | Collaborative (brainstorming, design) | Moderate Authority + Implementation Intentions | Over-constraining creative steps |
92
+ | Reference (docs, guides) | Structural Isolation + Positional Privilege | All persuasion — clarity only |
93
+
94
+ ## The 3-Zone Architecture
95
+
96
+ Apply these principles through the 3-zone skill layout:
97
+
98
+ - **Zone 1 (Primacy):** Identity + Iron Laws + Priority Stack — leverages positional privilege + authority + commitment
99
+ - **Zone 2 (Process):** IF-THEN rules + decision tables + gate functions — leverages implementation intentions + structural isolation
100
+ - **Zone 3 (Recency):** Restated laws + Red Flags + meta-instruction — leverages recency + redundant reinforcement + consequence framing
101
+
102
+ ## Temporal Testing Advisory
103
+
104
+ Prompt engineering techniques lose effectiveness as models improve. Re-test skill compliance every major model version. Include a "last verified" date on persuasion-dependent skills.
105
+
106
+ **Last verified:** Claude Opus 4.6, March 2026
107
+
108
+ ## Sources
109
+
110
+ - Meincke et al. (2025). "Call Me A Jerk: Persuading AI to Comply" (N=28,000, SSRN)
111
+ - Liu et al. (2024). "Lost in the Middle" (TACL, arXiv:2307.03172)
112
+ - Wallace et al. (2024). "The Instruction Hierarchy" (OpenAI, arXiv:2404.13208)
113
+ - Gollwitzer (1999). Implementation Intentions (d=0.65, 94 studies meta-analysis)
114
+ - EmotionPrompt (2023). Emotional framing effects (arXiv:2307.11760)
115
+ - Zhou et al. (2023). IFEval benchmark (arXiv:2311.07911)
116
+ - Anthropic (2024). Claude Model Spec — instruction hierarchy documentation
@@ -19,6 +19,7 @@ import {
19
19
  } from './store.js';
20
20
  import { readRunConfig, getPhaseLoopCap } from './run-config.js';
21
21
  import { readUsage, generateReport, initUsage, recordCaptureSavings, recordPhaseUsage } from './usage.js';
22
+ import { appendDecision } from './decision.js';
22
23
  import { evaluateLoopCapGuard } from '../guards/loop-cap-guard.js';
23
24
  import { evaluatePhasePrerequisiteGuard } from '../guards/phase-prerequisite-guard.js';
24
25
 
@@ -73,6 +74,8 @@ function resolveCaptureContext(parsed, context = {}) {
73
74
  'command',
74
75
  'exit-code',
75
76
  'task-id',
77
+ 'decision',
78
+ 'reason',
76
79
  ],
77
80
  });
78
81
  const stateRoot = resolveStateRoot(projectRoot, manifest, {
@@ -388,6 +391,29 @@ function handleUsage(parsed, context = {}) {
388
391
  };
389
392
  }
390
393
 
394
+ function handleDecision(parsed, context = {}) {
395
+ const { stateRoot, options } = resolveCaptureContext(parsed, context);
396
+
397
+ requireOption(options, 'run', 'Usage: wazir capture decision --run <id> --phase <phase> --decision "<text>" --reason "<text>" [--task-id <id>] [--state-root <path>] [--json]');
398
+ requireOption(options, 'phase', 'Usage: wazir capture decision --run <id> --phase <phase> --decision "<text>" --reason "<text>" [--task-id <id>] [--state-root <path>] [--json]');
399
+ requireOption(options, 'decision', 'Usage: wazir capture decision --run <id> --phase <phase> --decision "<text>" --reason "<text>" [--task-id <id>] [--state-root <path>] [--json]');
400
+ requireOption(options, 'reason', 'Usage: wazir capture decision --run <id> --phase <phase> --decision "<text>" --reason "<text>" [--task-id <id>] [--state-root <path>] [--json]');
401
+
402
+ const runPaths = getRunPaths(stateRoot, options.run);
403
+ appendDecision(runPaths, {
404
+ phase: options.phase,
405
+ decision: options.decision,
406
+ reason: options.reason,
407
+ task_id: options.taskId,
408
+ });
409
+
410
+ return formatResult({
411
+ run_id: options.run,
412
+ event: 'decision',
413
+ decisions_path: runPaths.decisionsPath,
414
+ }, { json: options.json });
415
+ }
416
+
391
417
  function handleLoopCheck(parsed, context = {}) {
392
418
  const { stateRoot, options } = resolveCaptureContext(parsed, context);
393
419
 
@@ -486,10 +512,12 @@ export function runCaptureCommand(parsed, context = {}) {
486
512
  return handleUsage(parsed, context);
487
513
  case 'loop-check':
488
514
  return handleLoopCheck(parsed, context);
515
+ case 'decision':
516
+ return handleDecision(parsed, context);
489
517
  default:
490
518
  return {
491
519
  exitCode: 1,
492
- stderr: 'Usage: wazir capture <init|event|route|output|summary|usage|loop-check> ...\n',
520
+ stderr: 'Usage: wazir capture <init|event|route|output|summary|usage|loop-check|decision> ...\n',
493
521
  };
494
522
  }
495
523
  } catch (error) {
@@ -0,0 +1,40 @@
1
+ import fs from 'node:fs';
2
+
3
+ /**
4
+ * Append a decision entry to the run's NDJSON log.
5
+ *
6
+ * @param {object} runPaths - Run paths object (must include decisionsPath)
7
+ * @param {object} entry - { phase, decision, reason, task_id? }
8
+ */
9
+ export function appendDecision(runPaths, { phase, decision, reason, task_id }) {
10
+ const record = {
11
+ timestamp: new Date().toISOString(),
12
+ phase: phase ?? 'unknown',
13
+ decision: decision ?? '',
14
+ reason: reason ?? '',
15
+ };
16
+ if (task_id) {
17
+ record.task_id = task_id;
18
+ }
19
+ fs.appendFileSync(runPaths.decisionsPath, JSON.stringify(record) + '\n');
20
+ return runPaths.decisionsPath;
21
+ }
22
+
23
+ /**
24
+ * Read all entries from a run's decisions log.
25
+ *
26
+ * @param {object} runPaths - Run paths object (must include decisionsPath)
27
+ * @returns {Array<object>}
28
+ */
29
+ export function readDecisions(runPaths) {
30
+ if (!fs.existsSync(runPaths.decisionsPath)) return [];
31
+
32
+ return fs.readFileSync(runPaths.decisionsPath, 'utf8')
33
+ .split('\n')
34
+ .filter(line => line.trim())
35
+ .map(line => {
36
+ try { return JSON.parse(line); }
37
+ catch { return null; }
38
+ })
39
+ .filter(Boolean);
40
+ }
@@ -19,6 +19,7 @@ export function getRunPaths(stateRoot, runId) {
19
19
  capturesDir,
20
20
  statusPath: path.join(runRoot, 'status.json'),
21
21
  eventsPath: path.join(runRoot, 'events.ndjson'),
22
+ decisionsPath: path.join(runRoot, 'decisions.ndjson'),
22
23
  summaryPath: path.join(runRoot, 'summary.md'),
23
24
  usagePath: path.join(runRoot, 'usage.json'),
24
25
  };
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Canonical depth parameter table.
3
+ *
4
+ * Single source of truth for all depth-dependent behavior across the pipeline.
5
+ * Skills reference these values conceptually; hooks and tooling import directly.
6
+ */
7
+
8
+ export const DEPTH_LEVELS = new Set(['quick', 'standard', 'deep']);
9
+
10
+ export const DEPTH_TABLE = {
11
+ quick: {
12
+ review_passes: 3,
13
+ loop_cap: 5,
14
+ heartbeat_max_silence_s: 180,
15
+ research_intensity: 'minimal',
16
+ challenge_intensity: 'surface',
17
+ spec_hardening_passes: 1,
18
+ design_review_passes: 1,
19
+ time_estimate_label: '~15-30 min',
20
+ },
21
+ standard: {
22
+ review_passes: 5,
23
+ loop_cap: 10,
24
+ heartbeat_max_silence_s: 120,
25
+ research_intensity: 'balanced',
26
+ challenge_intensity: 'balanced',
27
+ spec_hardening_passes: 3,
28
+ design_review_passes: 3,
29
+ time_estimate_label: '~45-90 min',
30
+ },
31
+ deep: {
32
+ review_passes: 7,
33
+ loop_cap: 15,
34
+ heartbeat_max_silence_s: 90,
35
+ research_intensity: 'thorough',
36
+ challenge_intensity: 'adversarial',
37
+ spec_hardening_passes: 5,
38
+ design_review_passes: 5,
39
+ time_estimate_label: '~2-3 hrs',
40
+ },
41
+ };
42
+
43
+ /**
44
+ * Get a specific depth parameter value.
45
+ *
46
+ * @param {string} depth — 'quick' | 'standard' | 'deep' (defaults to 'standard')
47
+ * @param {string} param — parameter name from the depth table
48
+ * @returns {*} the parameter value
49
+ */
50
+ export function getDepthParam(depth, param) {
51
+ const level = depth ?? 'standard';
52
+ if (!DEPTH_LEVELS.has(level)) {
53
+ throw new Error(`Unknown depth level: "${level}". Valid levels: ${[...DEPTH_LEVELS].join(', ')}`);
54
+ }
55
+ const entry = DEPTH_TABLE[level];
56
+ if (!(param in entry)) {
57
+ throw new Error(`Unknown depth parameter: "${param}". Valid params: ${Object.keys(entry).join(', ')}`);
58
+ }
59
+ return entry[param];
60
+ }
@@ -91,8 +91,7 @@ function renderCommonInstructions(host, manifest) {
91
91
  const DEFAULT_CLAUDE_HOOKS = {
92
92
  hooks: {
93
93
  PreToolUse: [
94
- { matcher: 'Write|Edit', hooks: [{ type: 'command', command: './hooks/protected-path-write-guard' }] },
95
- { matcher: 'Bash', hooks: [{ type: 'command', command: './hooks/context-mode-router' }] },
94
+ { matcher: 'Write|Edit|Bash', hooks: [{ type: 'command', command: './hooks/pretooluse-dispatcher' }] },
96
95
  ],
97
96
  SessionStart: [
98
97
  { hooks: [{ type: 'command', command: './hooks/loop-cap-guard' }] },
@@ -115,21 +114,21 @@ function renderCursorHooks() {
115
114
  return JSON.stringify({
116
115
  hooks: [
117
116
  {
118
- name: 'protected-path-write-guard',
119
- command: './hooks/protected-path-write-guard',
117
+ name: 'pretooluse-dispatcher',
118
+ command: './hooks/pretooluse-dispatcher',
120
119
  },
121
120
  {
122
121
  name: 'loop-cap-guard',
123
122
  command: './hooks/loop-cap-guard',
124
123
  },
125
- {
126
- name: 'context-mode-router',
127
- command: './hooks/context-mode-router',
128
- },
129
124
  {
130
125
  name: 'session-start',
131
126
  command: './hooks/session-start',
132
127
  },
128
+ {
129
+ name: 'stop-pipeline-gate',
130
+ command: './hooks/stop-pipeline-gate',
131
+ },
133
132
  ],
134
133
  }, null, 2);
135
134
  }
@@ -0,0 +1,131 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+
4
+ // ---------------------------------------------------------------------------
5
+ // Helpers
6
+ // ---------------------------------------------------------------------------
7
+
8
+ function fileExistsAndNonEmpty(filePath) {
9
+ if (!fs.existsSync(filePath)) return false;
10
+ const stat = fs.statSync(filePath);
11
+ return stat.size > 0;
12
+ }
13
+
14
+ function result(passed, reason, missing = []) {
15
+ return { passed, reason, ...(missing.length > 0 ? { missing } : {}) };
16
+ }
17
+
18
+ // ---------------------------------------------------------------------------
19
+ // Per-phase validators
20
+ // ---------------------------------------------------------------------------
21
+
22
+ const CLARIFY_ARTIFACTS = [
23
+ 'clarified/clarification.md',
24
+ 'clarified/spec-hardened.md',
25
+ 'clarified/design.md',
26
+ 'clarified/execution-plan.md',
27
+ ];
28
+
29
+ /**
30
+ * Validates clarify phase produced all required artifacts.
31
+ */
32
+ export function validateClarifyComplete(_state, runDir) {
33
+ const missing = [];
34
+ for (const relPath of CLARIFY_ARTIFACTS) {
35
+ const full = path.join(runDir, relPath);
36
+ if (!fileExistsAndNonEmpty(full)) {
37
+ missing.push(relPath);
38
+ }
39
+ }
40
+ if (missing.length > 0) {
41
+ return result(false, `Missing clarify artifacts: ${missing.join(', ')}`, missing);
42
+ }
43
+ return result(true, 'All clarify artifacts present and non-empty.');
44
+ }
45
+
46
+ /**
47
+ * Validates execute phase: at least one task artifact dir and verification proof.
48
+ */
49
+ export function validateExecuteComplete(_state, runDir) {
50
+ const missing = [];
51
+ const artifactsDir = path.join(runDir, 'artifacts');
52
+
53
+ // Check for at least one task-NNN directory with content
54
+ const taskDirs = fs.existsSync(artifactsDir)
55
+ ? fs.readdirSync(artifactsDir).filter(d => d.startsWith('task-') && fs.statSync(path.join(artifactsDir, d)).isDirectory())
56
+ : [];
57
+
58
+ if (taskDirs.length === 0) {
59
+ missing.push('artifacts/task-NNN/ (no task artifacts found)');
60
+ }
61
+
62
+ // Check verification proof
63
+ const proofPath = path.join(artifactsDir, 'verification-proof.md');
64
+ if (!fileExistsAndNonEmpty(proofPath)) {
65
+ missing.push('artifacts/verification-proof.md');
66
+ }
67
+
68
+ if (missing.length > 0) {
69
+ return result(false, `Missing execute artifacts: ${missing.join(', ')}`, missing);
70
+ }
71
+ return result(true, `Execute complete: ${taskDirs.length} task(s) + verification proof.`);
72
+ }
73
+
74
+ /**
75
+ * Validates verify phase: proof exists and has substantive content.
76
+ */
77
+ export function validateVerifyComplete(_state, runDir) {
78
+ const proofPath = path.join(runDir, 'artifacts', 'verification-proof.md');
79
+ if (!fileExistsAndNonEmpty(proofPath)) {
80
+ return result(false, 'Verification proof missing or empty.', ['artifacts/verification-proof.md']);
81
+ }
82
+
83
+ const content = fs.readFileSync(proofPath, 'utf8');
84
+ if (content.trim().length < 20) {
85
+ return result(false, 'Verification proof exists but has insufficient content.', ['artifacts/verification-proof.md']);
86
+ }
87
+
88
+ return result(true, 'Verification proof present with evidence.');
89
+ }
90
+
91
+ /**
92
+ * Validates review phase: verdict.json with a numeric score.
93
+ */
94
+ export function validateReviewComplete(_state, runDir) {
95
+ const verdictPath = path.join(runDir, 'reviews', 'verdict.json');
96
+ if (!fs.existsSync(verdictPath)) {
97
+ return result(false, 'Review verdict missing.', ['reviews/verdict.json']);
98
+ }
99
+
100
+ try {
101
+ const verdict = JSON.parse(fs.readFileSync(verdictPath, 'utf8'));
102
+ if (typeof verdict.score !== 'number') {
103
+ return result(false, 'Review verdict has no numeric score.', ['reviews/verdict.json (missing score)']);
104
+ }
105
+ return result(true, `Review complete with score ${verdict.score}.`);
106
+ } catch {
107
+ return result(false, 'Review verdict is not valid JSON.', ['reviews/verdict.json']);
108
+ }
109
+ }
110
+
111
+ // ---------------------------------------------------------------------------
112
+ // Dispatcher
113
+ // ---------------------------------------------------------------------------
114
+
115
+ const VALIDATORS = {
116
+ clarify: validateClarifyComplete,
117
+ execute: validateExecuteComplete,
118
+ verify: validateVerifyComplete,
119
+ review: validateReviewComplete,
120
+ };
121
+
122
+ /**
123
+ * Run the guardrail for a given phase.
124
+ */
125
+ export function runGuardrail(phase, state, runDir) {
126
+ const validator = VALIDATORS[phase];
127
+ if (!validator) {
128
+ throw new Error(`Unknown phase for guardrail: ${phase}`);
129
+ }
130
+ return validator(state, runDir);
131
+ }
@@ -143,10 +143,42 @@ export function evaluatePhasePrerequisiteGuard(payload) {
143
143
  const requiredPhaseExits = prerequisites.required_phase_exits ?? [];
144
144
 
145
145
  const missingArtifacts = [];
146
+ const failedProofs = [];
146
147
  for (const artifact of requiredArtifacts) {
147
148
  const artifactPath = path.join(runPaths.runRoot, artifact);
148
149
  if (!fs.existsSync(artifactPath)) {
149
150
  missingArtifacts.push(artifact);
151
+ continue;
152
+ }
153
+
154
+ const basename = path.basename(artifact);
155
+
156
+ // Content validation for proof JSON files (e.g. proof-task-001.json, verification-proof.json)
157
+ if (basename.includes('proof') && basename.endsWith('.json')) {
158
+ try {
159
+ const content = fs.readFileSync(artifactPath, 'utf8');
160
+ const parsed = JSON.parse(content);
161
+ if (parsed.all_passed !== true) {
162
+ failedProofs.push(`${artifact}: all_passed is not true (got ${JSON.stringify(parsed.all_passed)})`);
163
+ }
164
+ } catch {
165
+ // Fail closed: malformed JSON blocks the phase
166
+ failedProofs.push(`${artifact}: malformed or unreadable JSON`);
167
+ }
168
+ continue;
169
+ }
170
+
171
+ // Content validation for verification-proof.md
172
+ if (basename === 'verification-proof.md') {
173
+ try {
174
+ const content = fs.readFileSync(artifactPath, 'utf8');
175
+ const lower = content.toLowerCase();
176
+ if (!lower.includes('status: pass') && !content.includes('PASS')) {
177
+ failedProofs.push(`${artifact}: does not contain "status: pass" or "PASS"`);
178
+ }
179
+ } catch {
180
+ failedProofs.push(`${artifact}: unreadable`);
181
+ }
150
182
  }
151
183
  }
152
184
 
@@ -158,10 +190,10 @@ export function evaluatePhasePrerequisiteGuard(payload) {
158
190
  }
159
191
  }
160
192
 
161
- // OR-logic for resumed runs: if all artifacts exist, pass even without phase_exit events.
193
+ // OR-logic for resumed runs: if all artifacts exist and proofs pass, allow even without phase_exit events.
162
194
  // Artifacts are the hard evidence; phase_exits are supplementary.
163
- // But if artifacts are missing, phase_exits alone are not sufficient.
164
- if (missingArtifacts.length === 0) {
195
+ // But if artifacts are missing or proofs fail, phase_exits alone are not sufficient.
196
+ if (missingArtifacts.length === 0 && failedProofs.length === 0) {
165
197
  return {
166
198
  allowed: true,
167
199
  reason: `All prerequisite artifacts present for phase ${phase}.`,
@@ -172,6 +204,9 @@ export function evaluatePhasePrerequisiteGuard(payload) {
172
204
  if (missingArtifacts.length > 0) {
173
205
  reasons.push(`Missing artifacts: ${missingArtifacts.join(', ')}`);
174
206
  }
207
+ if (failedProofs.length > 0) {
208
+ reasons.push(`Failed proof validation: ${failedProofs.join('; ')}`);
209
+ }
175
210
  if (missingPhaseExits.length > 0) {
176
211
  reasons.push(`Missing phase exits: ${missingPhaseExits.join(', ')}`);
177
212
  }
@@ -180,6 +215,7 @@ export function evaluatePhasePrerequisiteGuard(payload) {
180
215
  allowed: false,
181
216
  reason: reasons.join('. '),
182
217
  missing_artifacts: missingArtifacts.length > 0 ? missingArtifacts : undefined,
218
+ failed_proofs: failedProofs.length > 0 ? failedProofs : undefined,
183
219
  missing_phase_exits: missingPhaseExits.length > 0 ? missingPhaseExits : undefined,
184
220
  };
185
221
  }