@wazir-dev/cli 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -2
- package/docs/research/2026-03-20-agents/a18fb002157904af5.txt +187 -0
- package/docs/research/2026-03-20-agents/a1d0ac79ac2f11e6f.txt +2 -0
- package/docs/research/2026-03-20-agents/a324079de037abd7c.txt +198 -0
- package/docs/research/2026-03-20-agents/a357586bccfafb0e5.txt +256 -0
- package/docs/research/2026-03-20-agents/a4365394e4d753105.txt +137 -0
- package/docs/research/2026-03-20-agents/a492af28bc52d3613.txt +136 -0
- package/docs/research/2026-03-20-agents/a4984db0b6a8eee07.txt +124 -0
- package/docs/research/2026-03-20-agents/a5b30e59d34bbb062.txt +214 -0
- package/docs/research/2026-03-20-agents/a5cf7829dab911586.txt +165 -0
- package/docs/research/2026-03-20-agents/a607157c30dd97c9e.txt +96 -0
- package/docs/research/2026-03-20-agents/a60b68b1e19d1e16b.txt +115 -0
- package/docs/research/2026-03-20-agents/a722af01c5594aba0.txt +166 -0
- package/docs/research/2026-03-20-agents/a787bdc516faa5829.txt +181 -0
- package/docs/research/2026-03-20-agents/a7c46d1bba1056ed2.txt +132 -0
- package/docs/research/2026-03-20-agents/a7e5abbab2b281a0d.txt +100 -0
- package/docs/research/2026-03-20-agents/a8dbadc66cd0d7d5a.txt +95 -0
- package/docs/research/2026-03-20-agents/a904d9f45d6b86a6d.txt +75 -0
- package/docs/research/2026-03-20-agents/a927659a942ee7f60.txt +102 -0
- package/docs/research/2026-03-20-agents/a962cb569191f7583.txt +125 -0
- package/docs/research/2026-03-20-agents/aab6decea538aac41.txt +148 -0
- package/docs/research/2026-03-20-agents/abd58b853dd938a1b.txt +295 -0
- package/docs/research/2026-03-20-agents/ac009da573eff7f65.txt +100 -0
- package/docs/research/2026-03-20-agents/ac1bc783364405e5f.txt +190 -0
- package/docs/research/2026-03-20-agents/aca5e2b57fde152a0.txt +132 -0
- package/docs/research/2026-03-20-agents/ad849b8c0a7e95b8b.txt +176 -0
- package/docs/research/2026-03-20-agents/adc2b12a4da32c962.txt +258 -0
- package/docs/research/2026-03-20-agents/af97caaaa9a80e4cb.txt +146 -0
- package/docs/research/2026-03-20-agents/afc5faceee368b3ca.txt +111 -0
- package/docs/research/2026-03-20-agents/afdb282d866e3c1e4.txt +164 -0
- package/docs/research/2026-03-20-agents/afe9d1f61c02b1e8d.txt +299 -0
- package/docs/research/2026-03-20-agents/b4hmkwril.txt +1856 -0
- package/docs/research/2026-03-20-agents/b80ptk89g.txt +1856 -0
- package/docs/research/2026-03-20-agents/bf54s1jss.txt +1150 -0
- package/docs/research/2026-03-20-agents/bhd6kq2kx.txt +1856 -0
- package/docs/research/2026-03-20-agents/bmb2fodyr.txt +988 -0
- package/docs/research/2026-03-20-agents/bmmsrij8i.txt +826 -0
- package/docs/research/2026-03-20-agents/bn4t2ywpu.txt +2175 -0
- package/docs/research/2026-03-20-agents/bu22t9f1z.txt +0 -0
- package/docs/research/2026-03-20-agents/bwvl98v2p.txt +738 -0
- package/docs/research/2026-03-20-agents/psych-a3697a7fd06eb64fd.txt +135 -0
- package/docs/research/2026-03-20-agents/psych-a37776fabc870feae.txt +123 -0
- package/docs/research/2026-03-20-agents/psych-a5b1fe05c0589efaf.txt +2 -0
- package/docs/research/2026-03-20-agents/psych-a95c15b1f29424435.txt +76 -0
- package/docs/research/2026-03-20-agents/psych-a9c26f4d9172dde7c.txt +2 -0
- package/docs/research/2026-03-20-agents/psych-aa19c69f0ca2c5ad3.txt +2 -0
- package/docs/research/2026-03-20-agents/psych-aa4e4cb70e1be5ecb.txt +95 -0
- package/docs/research/2026-03-20-agents/psych-ab5b302f26a554663.txt +102 -0
- package/docs/research/2026-03-20-deep-research-complete.md +101 -0
- package/docs/research/2026-03-20-deep-research-status.md +38 -0
- package/docs/research/2026-03-20-enforcement-research.md +107 -0
- package/expertise/composition-map.yaml +27 -8
- package/expertise/digests/reviewer/ai-coding-digest.md +83 -0
- package/expertise/digests/reviewer/architectural-thinking-digest.md +63 -0
- package/expertise/digests/reviewer/architecture-antipatterns-digest.md +49 -0
- package/expertise/digests/reviewer/code-smells-digest.md +53 -0
- package/expertise/digests/reviewer/coupling-cohesion-digest.md +54 -0
- package/expertise/digests/reviewer/ddd-digest.md +60 -0
- package/expertise/digests/reviewer/dependency-risk-digest.md +40 -0
- package/expertise/digests/reviewer/error-handling-digest.md +55 -0
- package/expertise/digests/reviewer/review-methodology-digest.md +49 -0
- package/exports/hosts/claude/.claude/commands/learn.md +61 -8
- package/exports/hosts/claude/.claude/settings.json +7 -6
- package/exports/hosts/claude/export.manifest.json +6 -3
- package/exports/hosts/claude/host-package.json +3 -0
- package/exports/hosts/codex/export.manifest.json +6 -3
- package/exports/hosts/codex/host-package.json +3 -0
- package/exports/hosts/cursor/.cursor/hooks.json +6 -6
- package/exports/hosts/cursor/export.manifest.json +6 -3
- package/exports/hosts/cursor/host-package.json +3 -0
- package/exports/hosts/gemini/export.manifest.json +6 -3
- package/exports/hosts/gemini/host-package.json +3 -0
- package/hooks/definitions/pretooluse_dispatcher.yaml +26 -0
- package/hooks/definitions/pretooluse_pipeline_guard.yaml +22 -0
- package/hooks/definitions/stop_pipeline_gate.yaml +22 -0
- package/hooks/hooks.json +7 -6
- package/hooks/pretooluse-dispatcher +84 -0
- package/hooks/pretooluse-pipeline-guard +9 -0
- package/hooks/stop-pipeline-gate +9 -0
- package/package.json +2 -2
- package/schemas/decision.schema.json +15 -0
- package/schemas/hook.schema.json +4 -1
- package/skills/TEMPLATE-3-ZONE.md +160 -0
- package/skills/brainstorming/SKILL.md +127 -23
- package/skills/clarifier/SKILL.md +175 -18
- package/skills/claude-cli/SKILL.md +91 -12
- package/skills/codex-cli/SKILL.md +91 -12
- package/skills/debugging/SKILL.md +133 -38
- package/skills/design/SKILL.md +173 -37
- package/skills/dispatching-parallel-agents/SKILL.md +129 -31
- package/skills/executing-plans/SKILL.md +113 -25
- package/skills/executor/SKILL.md +185 -21
- package/skills/finishing-a-development-branch/SKILL.md +107 -18
- package/skills/gemini-cli/SKILL.md +91 -12
- package/skills/humanize/SKILL.md +92 -13
- package/skills/init-pipeline/SKILL.md +90 -17
- package/skills/prepare-next/SKILL.md +93 -24
- package/skills/receiving-code-review/SKILL.md +90 -16
- package/skills/requesting-code-review/SKILL.md +100 -24
- package/skills/requesting-code-review/code-reviewer.md +29 -17
- package/skills/reviewer/SKILL.md +190 -50
- package/skills/run-audit/SKILL.md +92 -15
- package/skills/scan-project/SKILL.md +93 -14
- package/skills/self-audit/SKILL.md +113 -39
- package/skills/skill-research/SKILL.md +94 -7
- package/skills/subagent-driven-development/SKILL.md +129 -30
- package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +30 -2
- package/skills/subagent-driven-development/implementer-prompt.md +40 -27
- package/skills/subagent-driven-development/spec-reviewer-prompt.md +25 -12
- package/skills/tdd/SKILL.md +125 -20
- package/skills/using-git-worktrees/SKILL.md +118 -28
- package/skills/using-skills/SKILL.md +116 -29
- package/skills/verification/SKILL.md +127 -22
- package/skills/wazir/SKILL.md +517 -153
- package/skills/writing-plans/SKILL.md +134 -28
- package/skills/writing-skills/SKILL.md +91 -13
- package/skills/writing-skills/anthropic-best-practices.md +104 -64
- package/skills/writing-skills/persuasion-principles.md +100 -34
- package/tooling/src/capture/command.js +29 -1
- package/tooling/src/capture/decision.js +40 -0
- package/tooling/src/capture/store.js +1 -0
- package/tooling/src/config/depth-table.js +60 -0
- package/tooling/src/export/compiler.js +7 -8
- package/tooling/src/guards/guardrail-functions.js +131 -0
- package/tooling/src/guards/phase-prerequisite-guard.js +39 -3
- package/tooling/src/hooks/pretooluse-dispatcher.js +300 -0
- package/tooling/src/hooks/pretooluse-pipeline-guard.js +141 -0
- package/tooling/src/hooks/stop-pipeline-gate.js +92 -0
- package/tooling/src/learn/pipeline.js +177 -0
- package/tooling/src/state/db.js +251 -2
- package/tooling/src/state/pipeline-state.js +262 -0
- package/wazir.manifest.yaml +3 -0
- package/workflows/learn.md +61 -8
|
@@ -2,49 +2,115 @@
|
|
|
2
2
|
|
|
3
3
|
## Overview
|
|
4
4
|
|
|
5
|
-
LLMs
|
|
5
|
+
LLMs exhibit statistical compliance biases that can be leveraged to improve instruction following. This is not psychology applied to machines — it is empirical prompt engineering grounded in attention mechanics, training distribution effects, and measured compliance rates.
|
|
6
6
|
|
|
7
|
-
**Research foundation:** Meincke et al. (2025) tested 7 persuasion principles with N=28,000 AI conversations.
|
|
7
|
+
**Research foundation:** Meincke et al. (2025) tested 7 persuasion principles with N=28,000 AI conversations. Commitment priming approached 100% compliance. Positive directive framing consistently outperformed negative framing. Authority framing lifted compliance by ~40pp.
|
|
8
8
|
|
|
9
|
-
##
|
|
9
|
+
## Principles Ranked by Evidence Strength
|
|
10
10
|
|
|
11
|
-
### 1
|
|
12
|
-
- Imperative language: "YOU MUST", "Never", "Always"
|
|
13
|
-
- Non-negotiable framing: "No exceptions"
|
|
14
|
-
- Eliminates decision fatigue and rationalization
|
|
11
|
+
### Tier 1: Strong Evidence, Large Effect
|
|
15
12
|
|
|
16
|
-
|
|
17
|
-
-
|
|
18
|
-
-
|
|
19
|
-
-
|
|
13
|
+
**1. Commitment Priming (highest impact)**
|
|
14
|
+
- Have the model announce its plan before executing
|
|
15
|
+
- Autoregressive consistency: once the model generates "I will do X", it is statistically more likely to do X
|
|
16
|
+
- Implementation: "Before executing, state which steps you will perform"
|
|
17
|
+
- Measured: near-100% compliance after self-commitment in Meincke et al.
|
|
20
18
|
|
|
21
|
-
|
|
22
|
-
-
|
|
23
|
-
-
|
|
24
|
-
-
|
|
19
|
+
**2. Positive Directive Framing**
|
|
20
|
+
- "Always do X" consistently outperforms "Never do Y"
|
|
21
|
+
- Token generation selects what to produce, not what to avoid
|
|
22
|
+
- Negative instructions ("do NOT mention X") can paradoxically increase mentions
|
|
23
|
+
- Use negative framing ONLY for critical guardrails with a positive alternative: "Do NOT skip review. Instead, run review quickly."
|
|
25
24
|
|
|
26
|
-
|
|
27
|
-
-
|
|
28
|
-
-
|
|
29
|
-
-
|
|
25
|
+
**3. Structural Isolation (XML Tags)**
|
|
26
|
+
- Claude is fine-tuned to attend to XML tag boundaries
|
|
27
|
+
- Tags create attention-weight spikes and trust boundaries
|
|
28
|
+
- Use `<rules>`, `<instructions>`, `<output_format>` for hard boundaries
|
|
29
|
+
- Hybrid XML+markdown is optimal: XML for structure, markdown for formatting within sections
|
|
30
30
|
|
|
31
|
-
|
|
32
|
-
-
|
|
33
|
-
-
|
|
31
|
+
**4. Positional Privilege (Primacy + Recency)**
|
|
32
|
+
- First ~500 tokens: ~95% compliance (primacy zone)
|
|
33
|
+
- Last ~500 tokens: ~85% compliance (recency zone)
|
|
34
|
+
- Middle of long context: ~65-75% compliance (lost in the middle)
|
|
35
|
+
- Critical rules go at beginning AND end. Never only in the middle.
|
|
34
36
|
|
|
35
|
-
###
|
|
36
|
-
- Obligation to return favors
|
|
37
|
-
- "I'll give you full context, you give me honest assessment"
|
|
37
|
+
### Tier 2: Strong Evidence, Moderate Effect
|
|
38
38
|
|
|
39
|
-
|
|
40
|
-
-
|
|
41
|
-
-
|
|
39
|
+
**5. Authority / Role Assignment**
|
|
40
|
+
- "You are a senior security auditor responsible for..." activates domain-specific patterns
|
|
41
|
+
- +40pp lift in Meincke et al.
|
|
42
|
+
- Expert personas produce more accurate, more disciplined output
|
|
43
|
+
|
|
44
|
+
**6. Consequence Framing**
|
|
45
|
+
- "Skipping this step causes silent regressions that waste hours of debugging"
|
|
46
|
+
- Provides reasoning context for why compliance matters
|
|
47
|
+
- More effective than abstract rules ("always follow the process")
|
|
48
|
+
|
|
49
|
+
**7. Implementation Intentions (IF-THEN rules)**
|
|
50
|
+
- "IF user says skip → THEN say 'Running it quickly' and execute"
|
|
51
|
+
- Pre-decides the response — no judgment call needed at runtime
|
|
52
|
+
- d=0.65 across 94 psychology studies (Gollwitzer). Maps directly to LLM prompt design.
|
|
53
|
+
- Single most actionable technique for skill authors
|
|
54
|
+
|
|
55
|
+
**8. Redundant Reinforcement**
|
|
56
|
+
- State the rule, show an example, reference it in the output format, add a constraint tag
|
|
57
|
+
- Multiple encoding paths survive when any single one fails
|
|
58
|
+
- Paraphrased repetition (2-3x) outperforms verbatim repetition
|
|
59
|
+
|
|
60
|
+
### Tier 3: Context-Dependent Effect
|
|
61
|
+
|
|
62
|
+
**9. Social Proof**
|
|
63
|
+
- "Standard practice is..." or "All production systems follow this pattern"
|
|
64
|
+
- Effective when baseline compliance is already moderate (+6pp)
|
|
65
|
+
|
|
66
|
+
**10. Urgency / Scarcity**
|
|
67
|
+
- "This must be done correctly the first time; there is no retry"
|
|
68
|
+
- Increases both compliance and output variance — use sparingly
|
|
69
|
+
|
|
70
|
+
**11. Moral / Ethical Framing**
|
|
71
|
+
- "Omitting this would produce misleading output"
|
|
72
|
+
- Effective for Claude specifically due to Constitutional AI training
|
|
73
|
+
- Frame positively (good outcome of compliance) not negatively
|
|
74
|
+
|
|
75
|
+
## Anti-Patterns
|
|
76
|
+
|
|
77
|
+
| Pattern | Problem |
|
|
78
|
+
|---------|---------|
|
|
79
|
+
| Negative instructions without alternatives | "Don't do X" fails — model must activate X to evaluate constraint |
|
|
80
|
+
| Instruction overload (>12 constraints) | Steep compliance drop after ~12 accumulated constraints |
|
|
81
|
+
| Threats without specifics | "You will be punished" increases variance without improving median |
|
|
82
|
+
| Reciprocity framing | "I helped you, now help me" — weakest principle, only +11pp |
|
|
83
|
+
| Relying solely on alignment | 80% of enterprises reported injection incidents. Structural defenses needed. |
|
|
42
84
|
|
|
43
85
|
## Principle Combinations by Skill Type
|
|
44
86
|
|
|
45
|
-
| Skill Type |
|
|
46
|
-
|
|
47
|
-
| Discipline-enforcing |
|
|
48
|
-
|
|
|
49
|
-
| Collaborative |
|
|
50
|
-
| Reference |
|
|
87
|
+
| Skill Type | Primary Techniques | Avoid |
|
|
88
|
+
|------------|-------------------|-------|
|
|
89
|
+
| Discipline-enforcing (TDD, verification) | Commitment + Implementation Intentions + Positional Privilege + Authority | Liking, Reciprocity |
|
|
90
|
+
| Process-governing (clarifier, executor) | Commitment + Consequence Framing + Structural Isolation | Heavy emotional framing |
|
|
91
|
+
| Collaborative (brainstorming, design) | Moderate Authority + Implementation Intentions | Over-constraining creative steps |
|
|
92
|
+
| Reference (docs, guides) | Structural Isolation + Positional Privilege | All persuasion — clarity only |
|
|
93
|
+
|
|
94
|
+
## The 3-Zone Architecture
|
|
95
|
+
|
|
96
|
+
Apply these principles through the 3-zone skill layout:
|
|
97
|
+
|
|
98
|
+
- **Zone 1 (Primacy):** Identity + Iron Laws + Priority Stack — leverages positional privilege + authority + commitment
|
|
99
|
+
- **Zone 2 (Process):** IF-THEN rules + decision tables + gate functions — leverages implementation intentions + structural isolation
|
|
100
|
+
- **Zone 3 (Recency):** Restated laws + Red Flags + meta-instruction — leverages recency + redundant reinforcement + consequence framing
|
|
101
|
+
|
|
102
|
+
## Temporal Testing Advisory
|
|
103
|
+
|
|
104
|
+
Prompt engineering techniques lose effectiveness as models improve. Re-test skill compliance every major model version. Include a "last verified" date on persuasion-dependent skills.
|
|
105
|
+
|
|
106
|
+
**Last verified:** Claude Opus 4.6, March 2026
|
|
107
|
+
|
|
108
|
+
## Sources
|
|
109
|
+
|
|
110
|
+
- Meincke et al. (2025). "Call Me A Jerk: Persuading AI to Comply" (N=28,000, SSRN)
|
|
111
|
+
- Liu et al. (2024). "Lost in the Middle" (TACL, arXiv:2307.03172)
|
|
112
|
+
- Wallace et al. (2024). "The Instruction Hierarchy" (OpenAI, arXiv:2404.13208)
|
|
113
|
+
- Gollwitzer (1999). Implementation Intentions (d=0.65, 94 studies meta-analysis)
|
|
114
|
+
- EmotionPrompt (2023). Emotional framing effects (arXiv:2307.11760)
|
|
115
|
+
- Zhou et al. (2023). IFEval benchmark (arXiv:2311.07911)
|
|
116
|
+
- Anthropic (2024). Claude Model Spec — instruction hierarchy documentation
|
|
@@ -19,6 +19,7 @@ import {
|
|
|
19
19
|
} from './store.js';
|
|
20
20
|
import { readRunConfig, getPhaseLoopCap } from './run-config.js';
|
|
21
21
|
import { readUsage, generateReport, initUsage, recordCaptureSavings, recordPhaseUsage } from './usage.js';
|
|
22
|
+
import { appendDecision } from './decision.js';
|
|
22
23
|
import { evaluateLoopCapGuard } from '../guards/loop-cap-guard.js';
|
|
23
24
|
import { evaluatePhasePrerequisiteGuard } from '../guards/phase-prerequisite-guard.js';
|
|
24
25
|
|
|
@@ -73,6 +74,8 @@ function resolveCaptureContext(parsed, context = {}) {
|
|
|
73
74
|
'command',
|
|
74
75
|
'exit-code',
|
|
75
76
|
'task-id',
|
|
77
|
+
'decision',
|
|
78
|
+
'reason',
|
|
76
79
|
],
|
|
77
80
|
});
|
|
78
81
|
const stateRoot = resolveStateRoot(projectRoot, manifest, {
|
|
@@ -388,6 +391,29 @@ function handleUsage(parsed, context = {}) {
|
|
|
388
391
|
};
|
|
389
392
|
}
|
|
390
393
|
|
|
394
|
+
function handleDecision(parsed, context = {}) {
|
|
395
|
+
const { stateRoot, options } = resolveCaptureContext(parsed, context);
|
|
396
|
+
|
|
397
|
+
requireOption(options, 'run', 'Usage: wazir capture decision --run <id> --phase <phase> --decision "<text>" --reason "<text>" [--task-id <id>] [--state-root <path>] [--json]');
|
|
398
|
+
requireOption(options, 'phase', 'Usage: wazir capture decision --run <id> --phase <phase> --decision "<text>" --reason "<text>" [--task-id <id>] [--state-root <path>] [--json]');
|
|
399
|
+
requireOption(options, 'decision', 'Usage: wazir capture decision --run <id> --phase <phase> --decision "<text>" --reason "<text>" [--task-id <id>] [--state-root <path>] [--json]');
|
|
400
|
+
requireOption(options, 'reason', 'Usage: wazir capture decision --run <id> --phase <phase> --decision "<text>" --reason "<text>" [--task-id <id>] [--state-root <path>] [--json]');
|
|
401
|
+
|
|
402
|
+
const runPaths = getRunPaths(stateRoot, options.run);
|
|
403
|
+
appendDecision(runPaths, {
|
|
404
|
+
phase: options.phase,
|
|
405
|
+
decision: options.decision,
|
|
406
|
+
reason: options.reason,
|
|
407
|
+
task_id: options.taskId,
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
return formatResult({
|
|
411
|
+
run_id: options.run,
|
|
412
|
+
event: 'decision',
|
|
413
|
+
decisions_path: runPaths.decisionsPath,
|
|
414
|
+
}, { json: options.json });
|
|
415
|
+
}
|
|
416
|
+
|
|
391
417
|
function handleLoopCheck(parsed, context = {}) {
|
|
392
418
|
const { stateRoot, options } = resolveCaptureContext(parsed, context);
|
|
393
419
|
|
|
@@ -486,10 +512,12 @@ export function runCaptureCommand(parsed, context = {}) {
|
|
|
486
512
|
return handleUsage(parsed, context);
|
|
487
513
|
case 'loop-check':
|
|
488
514
|
return handleLoopCheck(parsed, context);
|
|
515
|
+
case 'decision':
|
|
516
|
+
return handleDecision(parsed, context);
|
|
489
517
|
default:
|
|
490
518
|
return {
|
|
491
519
|
exitCode: 1,
|
|
492
|
-
stderr: 'Usage: wazir capture <init|event|route|output|summary|usage|loop-check> ...\n',
|
|
520
|
+
stderr: 'Usage: wazir capture <init|event|route|output|summary|usage|loop-check|decision> ...\n',
|
|
493
521
|
};
|
|
494
522
|
}
|
|
495
523
|
} catch (error) {
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Append a decision entry to the run's NDJSON log.
|
|
5
|
+
*
|
|
6
|
+
* @param {object} runPaths - Run paths object (must include decisionsPath)
|
|
7
|
+
* @param {object} entry - { phase, decision, reason, task_id? }
|
|
8
|
+
*/
|
|
9
|
+
export function appendDecision(runPaths, { phase, decision, reason, task_id }) {
|
|
10
|
+
const record = {
|
|
11
|
+
timestamp: new Date().toISOString(),
|
|
12
|
+
phase: phase ?? 'unknown',
|
|
13
|
+
decision: decision ?? '',
|
|
14
|
+
reason: reason ?? '',
|
|
15
|
+
};
|
|
16
|
+
if (task_id) {
|
|
17
|
+
record.task_id = task_id;
|
|
18
|
+
}
|
|
19
|
+
fs.appendFileSync(runPaths.decisionsPath, JSON.stringify(record) + '\n');
|
|
20
|
+
return runPaths.decisionsPath;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Read all entries from a run's decisions log.
|
|
25
|
+
*
|
|
26
|
+
* @param {object} runPaths - Run paths object (must include decisionsPath)
|
|
27
|
+
* @returns {Array<object>}
|
|
28
|
+
*/
|
|
29
|
+
export function readDecisions(runPaths) {
|
|
30
|
+
if (!fs.existsSync(runPaths.decisionsPath)) return [];
|
|
31
|
+
|
|
32
|
+
return fs.readFileSync(runPaths.decisionsPath, 'utf8')
|
|
33
|
+
.split('\n')
|
|
34
|
+
.filter(line => line.trim())
|
|
35
|
+
.map(line => {
|
|
36
|
+
try { return JSON.parse(line); }
|
|
37
|
+
catch { return null; }
|
|
38
|
+
})
|
|
39
|
+
.filter(Boolean);
|
|
40
|
+
}
|
|
@@ -19,6 +19,7 @@ export function getRunPaths(stateRoot, runId) {
|
|
|
19
19
|
capturesDir,
|
|
20
20
|
statusPath: path.join(runRoot, 'status.json'),
|
|
21
21
|
eventsPath: path.join(runRoot, 'events.ndjson'),
|
|
22
|
+
decisionsPath: path.join(runRoot, 'decisions.ndjson'),
|
|
22
23
|
summaryPath: path.join(runRoot, 'summary.md'),
|
|
23
24
|
usagePath: path.join(runRoot, 'usage.json'),
|
|
24
25
|
};
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonical depth parameter table.
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for all depth-dependent behavior across the pipeline.
|
|
5
|
+
* Skills reference these values conceptually; hooks and tooling import directly.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export const DEPTH_LEVELS = new Set(['quick', 'standard', 'deep']);
|
|
9
|
+
|
|
10
|
+
export const DEPTH_TABLE = {
|
|
11
|
+
quick: {
|
|
12
|
+
review_passes: 3,
|
|
13
|
+
loop_cap: 5,
|
|
14
|
+
heartbeat_max_silence_s: 180,
|
|
15
|
+
research_intensity: 'minimal',
|
|
16
|
+
challenge_intensity: 'surface',
|
|
17
|
+
spec_hardening_passes: 1,
|
|
18
|
+
design_review_passes: 1,
|
|
19
|
+
time_estimate_label: '~15-30 min',
|
|
20
|
+
},
|
|
21
|
+
standard: {
|
|
22
|
+
review_passes: 5,
|
|
23
|
+
loop_cap: 10,
|
|
24
|
+
heartbeat_max_silence_s: 120,
|
|
25
|
+
research_intensity: 'balanced',
|
|
26
|
+
challenge_intensity: 'balanced',
|
|
27
|
+
spec_hardening_passes: 3,
|
|
28
|
+
design_review_passes: 3,
|
|
29
|
+
time_estimate_label: '~45-90 min',
|
|
30
|
+
},
|
|
31
|
+
deep: {
|
|
32
|
+
review_passes: 7,
|
|
33
|
+
loop_cap: 15,
|
|
34
|
+
heartbeat_max_silence_s: 90,
|
|
35
|
+
research_intensity: 'thorough',
|
|
36
|
+
challenge_intensity: 'adversarial',
|
|
37
|
+
spec_hardening_passes: 5,
|
|
38
|
+
design_review_passes: 5,
|
|
39
|
+
time_estimate_label: '~2-3 hrs',
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Get a specific depth parameter value.
|
|
45
|
+
*
|
|
46
|
+
* @param {string} depth — 'quick' | 'standard' | 'deep' (defaults to 'standard')
|
|
47
|
+
* @param {string} param — parameter name from the depth table
|
|
48
|
+
* @returns {*} the parameter value
|
|
49
|
+
*/
|
|
50
|
+
export function getDepthParam(depth, param) {
|
|
51
|
+
const level = depth ?? 'standard';
|
|
52
|
+
if (!DEPTH_LEVELS.has(level)) {
|
|
53
|
+
throw new Error(`Unknown depth level: "${level}". Valid levels: ${[...DEPTH_LEVELS].join(', ')}`);
|
|
54
|
+
}
|
|
55
|
+
const entry = DEPTH_TABLE[level];
|
|
56
|
+
if (!(param in entry)) {
|
|
57
|
+
throw new Error(`Unknown depth parameter: "${param}". Valid params: ${Object.keys(entry).join(', ')}`);
|
|
58
|
+
}
|
|
59
|
+
return entry[param];
|
|
60
|
+
}
|
|
@@ -91,8 +91,7 @@ function renderCommonInstructions(host, manifest) {
|
|
|
91
91
|
const DEFAULT_CLAUDE_HOOKS = {
|
|
92
92
|
hooks: {
|
|
93
93
|
PreToolUse: [
|
|
94
|
-
{ matcher: 'Write|Edit', hooks: [{ type: 'command', command: './hooks/
|
|
95
|
-
{ matcher: 'Bash', hooks: [{ type: 'command', command: './hooks/context-mode-router' }] },
|
|
94
|
+
{ matcher: 'Write|Edit|Bash', hooks: [{ type: 'command', command: './hooks/pretooluse-dispatcher' }] },
|
|
96
95
|
],
|
|
97
96
|
SessionStart: [
|
|
98
97
|
{ hooks: [{ type: 'command', command: './hooks/loop-cap-guard' }] },
|
|
@@ -115,21 +114,21 @@ function renderCursorHooks() {
|
|
|
115
114
|
return JSON.stringify({
|
|
116
115
|
hooks: [
|
|
117
116
|
{
|
|
118
|
-
name: '
|
|
119
|
-
command: './hooks/
|
|
117
|
+
name: 'pretooluse-dispatcher',
|
|
118
|
+
command: './hooks/pretooluse-dispatcher',
|
|
120
119
|
},
|
|
121
120
|
{
|
|
122
121
|
name: 'loop-cap-guard',
|
|
123
122
|
command: './hooks/loop-cap-guard',
|
|
124
123
|
},
|
|
125
|
-
{
|
|
126
|
-
name: 'context-mode-router',
|
|
127
|
-
command: './hooks/context-mode-router',
|
|
128
|
-
},
|
|
129
124
|
{
|
|
130
125
|
name: 'session-start',
|
|
131
126
|
command: './hooks/session-start',
|
|
132
127
|
},
|
|
128
|
+
{
|
|
129
|
+
name: 'stop-pipeline-gate',
|
|
130
|
+
command: './hooks/stop-pipeline-gate',
|
|
131
|
+
},
|
|
133
132
|
],
|
|
134
133
|
}, null, 2);
|
|
135
134
|
}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
// Helpers
|
|
6
|
+
// ---------------------------------------------------------------------------
|
|
7
|
+
|
|
8
|
+
function fileExistsAndNonEmpty(filePath) {
|
|
9
|
+
if (!fs.existsSync(filePath)) return false;
|
|
10
|
+
const stat = fs.statSync(filePath);
|
|
11
|
+
return stat.size > 0;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function result(passed, reason, missing = []) {
|
|
15
|
+
return { passed, reason, ...(missing.length > 0 ? { missing } : {}) };
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Per-phase validators
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
const CLARIFY_ARTIFACTS = [
|
|
23
|
+
'clarified/clarification.md',
|
|
24
|
+
'clarified/spec-hardened.md',
|
|
25
|
+
'clarified/design.md',
|
|
26
|
+
'clarified/execution-plan.md',
|
|
27
|
+
];
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Validates clarify phase produced all required artifacts.
|
|
31
|
+
*/
|
|
32
|
+
export function validateClarifyComplete(_state, runDir) {
|
|
33
|
+
const missing = [];
|
|
34
|
+
for (const relPath of CLARIFY_ARTIFACTS) {
|
|
35
|
+
const full = path.join(runDir, relPath);
|
|
36
|
+
if (!fileExistsAndNonEmpty(full)) {
|
|
37
|
+
missing.push(relPath);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
if (missing.length > 0) {
|
|
41
|
+
return result(false, `Missing clarify artifacts: ${missing.join(', ')}`, missing);
|
|
42
|
+
}
|
|
43
|
+
return result(true, 'All clarify artifacts present and non-empty.');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Validates execute phase: at least one task artifact dir and verification proof.
|
|
48
|
+
*/
|
|
49
|
+
export function validateExecuteComplete(_state, runDir) {
|
|
50
|
+
const missing = [];
|
|
51
|
+
const artifactsDir = path.join(runDir, 'artifacts');
|
|
52
|
+
|
|
53
|
+
// Check for at least one task-NNN directory with content
|
|
54
|
+
const taskDirs = fs.existsSync(artifactsDir)
|
|
55
|
+
? fs.readdirSync(artifactsDir).filter(d => d.startsWith('task-') && fs.statSync(path.join(artifactsDir, d)).isDirectory())
|
|
56
|
+
: [];
|
|
57
|
+
|
|
58
|
+
if (taskDirs.length === 0) {
|
|
59
|
+
missing.push('artifacts/task-NNN/ (no task artifacts found)');
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Check verification proof
|
|
63
|
+
const proofPath = path.join(artifactsDir, 'verification-proof.md');
|
|
64
|
+
if (!fileExistsAndNonEmpty(proofPath)) {
|
|
65
|
+
missing.push('artifacts/verification-proof.md');
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (missing.length > 0) {
|
|
69
|
+
return result(false, `Missing execute artifacts: ${missing.join(', ')}`, missing);
|
|
70
|
+
}
|
|
71
|
+
return result(true, `Execute complete: ${taskDirs.length} task(s) + verification proof.`);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Validates verify phase: proof exists and has substantive content.
|
|
76
|
+
*/
|
|
77
|
+
export function validateVerifyComplete(_state, runDir) {
|
|
78
|
+
const proofPath = path.join(runDir, 'artifacts', 'verification-proof.md');
|
|
79
|
+
if (!fileExistsAndNonEmpty(proofPath)) {
|
|
80
|
+
return result(false, 'Verification proof missing or empty.', ['artifacts/verification-proof.md']);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const content = fs.readFileSync(proofPath, 'utf8');
|
|
84
|
+
if (content.trim().length < 20) {
|
|
85
|
+
return result(false, 'Verification proof exists but has insufficient content.', ['artifacts/verification-proof.md']);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return result(true, 'Verification proof present with evidence.');
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Validates review phase: verdict.json with a numeric score.
|
|
93
|
+
*/
|
|
94
|
+
export function validateReviewComplete(_state, runDir) {
|
|
95
|
+
const verdictPath = path.join(runDir, 'reviews', 'verdict.json');
|
|
96
|
+
if (!fs.existsSync(verdictPath)) {
|
|
97
|
+
return result(false, 'Review verdict missing.', ['reviews/verdict.json']);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
try {
|
|
101
|
+
const verdict = JSON.parse(fs.readFileSync(verdictPath, 'utf8'));
|
|
102
|
+
if (typeof verdict.score !== 'number') {
|
|
103
|
+
return result(false, 'Review verdict has no numeric score.', ['reviews/verdict.json (missing score)']);
|
|
104
|
+
}
|
|
105
|
+
return result(true, `Review complete with score ${verdict.score}.`);
|
|
106
|
+
} catch {
|
|
107
|
+
return result(false, 'Review verdict is not valid JSON.', ['reviews/verdict.json']);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// ---------------------------------------------------------------------------
|
|
112
|
+
// Dispatcher
|
|
113
|
+
// ---------------------------------------------------------------------------
|
|
114
|
+
|
|
115
|
+
const VALIDATORS = {
|
|
116
|
+
clarify: validateClarifyComplete,
|
|
117
|
+
execute: validateExecuteComplete,
|
|
118
|
+
verify: validateVerifyComplete,
|
|
119
|
+
review: validateReviewComplete,
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Run the guardrail for a given phase.
|
|
124
|
+
*/
|
|
125
|
+
export function runGuardrail(phase, state, runDir) {
|
|
126
|
+
const validator = VALIDATORS[phase];
|
|
127
|
+
if (!validator) {
|
|
128
|
+
throw new Error(`Unknown phase for guardrail: ${phase}`);
|
|
129
|
+
}
|
|
130
|
+
return validator(state, runDir);
|
|
131
|
+
}
|
|
@@ -143,10 +143,42 @@ export function evaluatePhasePrerequisiteGuard(payload) {
|
|
|
143
143
|
const requiredPhaseExits = prerequisites.required_phase_exits ?? [];
|
|
144
144
|
|
|
145
145
|
const missingArtifacts = [];
|
|
146
|
+
const failedProofs = [];
|
|
146
147
|
for (const artifact of requiredArtifacts) {
|
|
147
148
|
const artifactPath = path.join(runPaths.runRoot, artifact);
|
|
148
149
|
if (!fs.existsSync(artifactPath)) {
|
|
149
150
|
missingArtifacts.push(artifact);
|
|
151
|
+
continue;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const basename = path.basename(artifact);
|
|
155
|
+
|
|
156
|
+
// Content validation for proof JSON files (e.g. proof-task-001.json, verification-proof.json)
|
|
157
|
+
if (basename.includes('proof') && basename.endsWith('.json')) {
|
|
158
|
+
try {
|
|
159
|
+
const content = fs.readFileSync(artifactPath, 'utf8');
|
|
160
|
+
const parsed = JSON.parse(content);
|
|
161
|
+
if (parsed.all_passed !== true) {
|
|
162
|
+
failedProofs.push(`${artifact}: all_passed is not true (got ${JSON.stringify(parsed.all_passed)})`);
|
|
163
|
+
}
|
|
164
|
+
} catch {
|
|
165
|
+
// Fail closed: malformed JSON blocks the phase
|
|
166
|
+
failedProofs.push(`${artifact}: malformed or unreadable JSON`);
|
|
167
|
+
}
|
|
168
|
+
continue;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Content validation for verification-proof.md
|
|
172
|
+
if (basename === 'verification-proof.md') {
|
|
173
|
+
try {
|
|
174
|
+
const content = fs.readFileSync(artifactPath, 'utf8');
|
|
175
|
+
const lower = content.toLowerCase();
|
|
176
|
+
if (!lower.includes('status: pass') && !content.includes('PASS')) {
|
|
177
|
+
failedProofs.push(`${artifact}: does not contain "status: pass" or "PASS"`);
|
|
178
|
+
}
|
|
179
|
+
} catch {
|
|
180
|
+
failedProofs.push(`${artifact}: unreadable`);
|
|
181
|
+
}
|
|
150
182
|
}
|
|
151
183
|
}
|
|
152
184
|
|
|
@@ -158,10 +190,10 @@ export function evaluatePhasePrerequisiteGuard(payload) {
|
|
|
158
190
|
}
|
|
159
191
|
}
|
|
160
192
|
|
|
161
|
-
// OR-logic for resumed runs: if all artifacts exist
|
|
193
|
+
// OR-logic for resumed runs: if all artifacts exist and proofs pass, allow even without phase_exit events.
|
|
162
194
|
// Artifacts are the hard evidence; phase_exits are supplementary.
|
|
163
|
-
// But if artifacts are missing, phase_exits alone are not sufficient.
|
|
164
|
-
if (missingArtifacts.length === 0) {
|
|
195
|
+
// But if artifacts are missing or proofs fail, phase_exits alone are not sufficient.
|
|
196
|
+
if (missingArtifacts.length === 0 && failedProofs.length === 0) {
|
|
165
197
|
return {
|
|
166
198
|
allowed: true,
|
|
167
199
|
reason: `All prerequisite artifacts present for phase ${phase}.`,
|
|
@@ -172,6 +204,9 @@ export function evaluatePhasePrerequisiteGuard(payload) {
|
|
|
172
204
|
if (missingArtifacts.length > 0) {
|
|
173
205
|
reasons.push(`Missing artifacts: ${missingArtifacts.join(', ')}`);
|
|
174
206
|
}
|
|
207
|
+
if (failedProofs.length > 0) {
|
|
208
|
+
reasons.push(`Failed proof validation: ${failedProofs.join('; ')}`);
|
|
209
|
+
}
|
|
175
210
|
if (missingPhaseExits.length > 0) {
|
|
176
211
|
reasons.push(`Missing phase exits: ${missingPhaseExits.join(', ')}`);
|
|
177
212
|
}
|
|
@@ -180,6 +215,7 @@ export function evaluatePhasePrerequisiteGuard(payload) {
|
|
|
180
215
|
allowed: false,
|
|
181
216
|
reason: reasons.join('. '),
|
|
182
217
|
missing_artifacts: missingArtifacts.length > 0 ? missingArtifacts : undefined,
|
|
218
|
+
failed_proofs: failedProofs.length > 0 ? failedProofs : undefined,
|
|
183
219
|
missing_phase_exits: missingPhaseExits.length > 0 ? missingPhaseExits : undefined,
|
|
184
220
|
};
|
|
185
221
|
}
|