azclaude-copilot 0.4.39 → 0.4.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@
9
9
  {
10
10
  "name": "azclaude",
11
11
  "description": "AZCLAUDE is a complete AI coding environment for Claude Code. It installs 37 commands, 10 auto-invoked skills, 15 specialized agents, 4 hooks, and a persistent memory system — in one command.\n\nKey features:\n• Memory across sessions — goals.md + checkpoints injected automatically before every session\n• Self-improving loop — /reflect fixes stale CLAUDE.md rules, /reflexes learns from tool-use patterns, /evolve creates agents from git evidence\n• Autonomous copilot mode — /copilot runs a three-tier team (orchestrator → problem-architect → milestone-builder) across sessions until the product ships\n• Spec-driven workflow — /constitute writes project rules, /spec writes structured ACs, /analyze detects plan drift and ghost milestones, /blueprint traces every milestone to a spec\n• Security layer — 111-rule environment scan (/sentinel), pre-write secret blocking, pre-ship credential audit\n• Progressive levels 0–10 — start with CLAUDE.md, grow into multi-agent pipelines and self-evolving environments\n• Zero dependencies — no npm packages, no external APIs, no vector databases. Plain markdown files and Claude Code's native architecture.\n• Smart install — npx azclaude-copilot@latest auto-detects first install vs upgrade vs verify. Context-aware onboarding shows the right next command for your project state.\n\nExample use cases:\n• /setup — scan an existing project, detect stack + domain + scale, fill CLAUDE.md, generate project-specific skills and agents automatically\n• /copilot \"Build a compliance SaaS with trilingual support\" — walk away, come back to working code across multiple sessions\n• /sentinel — run a scored security audit (0–100, grade A–F) across hooks, permissions, MCP servers, agent configs, and secrets\n• /evolve — detect gaps in the environment, generate new skills and agents from git co-change evidence, report score delta (e.g. 42/100 → 68/100)\n• /constitute — write your project's constitution (non-negotiables, architectural commitments, definition of done) — gates all future AI actions\n• /analyze — cross-artifact consistency check: ghost milestones, spec vs. code drift, unplanned commits\n• /reflect — find stale, missing, or contradicting rules in CLAUDE.md and propose exact fixes\n• /debate \"REST vs GraphQL for this project\" — adversarial evidence-based decision with order-independent scoring, logged to decisions.md",
12
- "version": "0.4.39",
12
+ "version": "0.4.40",
13
13
  "source": {
14
14
  "source": "github",
15
15
  "repo": "haytamAroui/AZ-CLAUDE-COPILOT",
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "azclaude",
3
- "version": "0.4.39",
4
- "description": "AZCLAUDE is a complete AI coding environment for Claude Code. It installs 37 commands, 10 auto-invoked skills, 15 specialized agents, 4 hooks, and a persistent memory system — in one command.\n\nKey features:\n• Memory across sessions — goals.md + checkpoints injected automatically before every session\n• Self-improving loop — /reflect fixes stale CLAUDE.md rules, /reflexes learns from tool-use patterns, /evolve creates agents from git evidence\n• Autonomous copilot mode — /copilot runs a three-tier team (orchestrator → problem-architect → milestone-builder) across sessions until the product ships\n• Spec-driven workflow — /constitute writes project rules, /spec writes structured ACs, /analyze detects plan drift and ghost milestones, /blueprint traces every milestone to a spec\n• Security layer — 111-rule environment scan (/sentinel), pre-write secret blocking, pre-ship credential audit\n• Progressive levels 0–10 — start with CLAUDE.md, grow into multi-agent pipelines and self-evolving environments\n• Zero dependencies — no npm packages, no external APIs, no vector databases. Plain markdown files and Claude Code's native architecture.\n• Smart install — npx azclaude-copilot@latest auto-detects first install vs upgrade vs verify. Context-aware onboarding shows the right next command for your project state.\n\nExample use cases:\n• /setup — scan an existing project, detect stack + domain + scale, fill CLAUDE.md, generate project-specific skills and agents automatically\n• /copilot \"Build a compliance SaaS with trilingual support\" — walk away, come back to working code across multiple sessions\n• /sentinel — run a scored security audit (0–100, grade A–F) across hooks, permissions, MCP servers, agent configs, and secrets\n• /evolve — detect gaps in the environment, generate new skills and agents from git co-change evidence, report score delta (e.g. 42/100 → 68/100)\n• /constitute — write your project's constitution (non-negotiables, architectural commitments, definition of done) — gates all future AI actions\n• /analyze — cross-artifact consistency check: ghost milestones, spec vs. code drift, unplanned commits\n• /reflect — find stale, missing, or contradicting rules in CLAUDE.md and propose exact fixes\n• /debate \"REST vs GraphQL for this project\" — adversarial evidence-based decision with order-independent scoring, logged to decisions.md",
3
+ "version": "0.4.40",
4
+ "description": "AZCLAUDE is a complete AI coding environment for Claude Code. It installs 39 commands, 10 auto-invoked skills, 15 specialized agents, 4 hooks, and a persistent memory system — in one command.\n\nKey features:\n• Memory across sessions — goals.md + checkpoints injected automatically before every session\n• Self-improving loop — /reflect fixes stale CLAUDE.md rules, /reflexes learns from tool-use patterns, /evolve creates agents from git evidence\n• Autonomous copilot mode — /copilot runs a three-tier team (orchestrator → problem-architect → milestone-builder) across sessions until the product ships\n• Spec-driven workflow — /constitute writes project rules, /spec writes structured ACs, /analyze detects plan drift and ghost milestones, /blueprint traces every milestone to a spec\n• Security layer — 111-rule environment scan (/sentinel), pre-write secret blocking, pre-ship credential audit\n• Progressive levels 0–10 — start with CLAUDE.md, grow into multi-agent pipelines and self-evolving environments\n• Zero dependencies — no npm packages, no external APIs, no vector databases. Plain markdown files and Claude Code's native architecture.\n• Smart install — npx azclaude-copilot@latest auto-detects first install vs upgrade vs verify. Context-aware onboarding shows the right next command for your project state.\n\nExample use cases:\n• /setup — scan an existing project, detect stack + domain + scale, fill CLAUDE.md, generate project-specific skills and agents automatically\n• /copilot \"Build a compliance SaaS with trilingual support\" — walk away, come back to working code across multiple sessions\n• /sentinel — run a scored security audit (0–100, grade A–F) across hooks, permissions, MCP servers, agent configs, and secrets\n• /evolve — detect gaps in the environment, generate new skills and agents from git co-change evidence, report score delta (e.g. 42/100 → 68/100)\n• /constitute — write your project's constitution (non-negotiables, architectural commitments, definition of done) — gates all future AI actions\n• /analyze — cross-artifact consistency check: ghost milestones, spec vs. code drift, unplanned commits\n• /reflect — find stale, missing, or contradicting rules in CLAUDE.md and propose exact fixes\n• /debate \"REST vs GraphQL for this project\" — adversarial evidence-based decision with order-independent scoring, logged to decisions.md",
5
5
  "author": {
6
6
  "name": "haytamAroui",
7
7
  "url": "https://github.com/haytamAroui"
package/README.md CHANGED
@@ -64,7 +64,7 @@ AZCLAUDE inverts this. **You start with almost nothing. The environment builds i
64
64
  npx azclaude-copilot@latest # one command. that's it.
65
65
  ```
66
66
 
67
- No agent files to write. No skills to configure. No prompt engineering. `npx azclaude-copilot` installs 37 commands, 4 hooks, memory structure, and a manifest. The rest is generated from your actual codebase as you work. Run the same command again later — it auto-detects whether to skip, install, or upgrade.
67
+ No agent files to write. No skills to configure. No prompt engineering. `npx azclaude-copilot` installs 39 commands, 4 hooks, memory structure, and a manifest. The rest is generated from your actual codebase as you work. Run the same command again later — it auto-detects whether to skip, install, or upgrade.
68
68
 
69
69
  **What the environment looks like across sessions:**
70
70
 
@@ -119,7 +119,7 @@ npx azclaude-copilot@latest
119
119
 
120
120
  One command, no flags. Auto-detects whether this is a fresh install or an upgrade:
121
121
 
122
- - **First time** → full install (37 commands, 4 hooks, 15 agents, 10 skills, memory, reflexes)
122
+ - **First time** → full install (39 commands, 4 hooks, 15 agents, 10 skills, memory, reflexes)
123
123
  - **Already installed, older version** → auto-upgrades everything to latest templates
124
124
  - **Already up to date** → verifies, no overwrites
125
125
 
@@ -131,12 +131,12 @@ npx azclaude-copilot@latest doctor # 32 checks — verify everything is wired
131
131
 
132
132
  ## What You Get
133
133
 
134
- **37 commands** · **10 auto-invoked skills** · **15 agents** · **4 hooks** · **memory across sessions** · **learned reflexes** · **self-evolving environment**
134
+ **39 commands** · **10 auto-invoked skills** · **15 agents** · **4 hooks** · **memory across sessions** · **learned reflexes** · **self-evolving environment**
135
135
 
136
136
  ```
137
137
  .claude/
138
138
  ├── CLAUDE.md ← dispatch table: conventions, stack, routing
139
- ├── commands/ ← 37 slash commands (/add, /fix, /copilot, /parallel, /mcp, /sentinel...)
139
+ ├── commands/ ← 39 slash commands (/add, /fix, /copilot, /parallel, /mcp, /sentinel...)
140
140
  ├── skills/ ← 10 skills (test-first, security, architecture-advisor, frontend-design...)
141
141
  ├── agents/ ← 15 agents (orchestrator, spec-reviewer, constitution-guard...)
142
142
  ├── capabilities/ ← 43 files, lazy-loaded via manifest.md (~380 tokens/task)
@@ -661,6 +661,8 @@ evidence_count: 6
661
661
  | `/mcp` | Recommend and install MCP servers for your stack. |
662
662
  | `/driven` | Generate code-rules.md — DO/DO NOT coding contract. |
663
663
  | `/verify` | Audit code against code-rules.md. Reports violations at `file:line`. |
664
+ | `/inoculate` | Scan agents/skills for context inoculation coverage. Based on Anthropic's misalignment paper. |
665
+ | `/ghost-test` | Detect reward hacking in test suites (AlwaysEqual, sys.exit bypass, framework patching). |
664
666
 
665
667
  ### Think and Improve
666
668
 
@@ -756,11 +758,11 @@ An agent is a sub-process. Use one when work must happen **in parallel** or **in
756
758
 
757
759
  ## Verified
758
760
 
759
- 1578 tests. Every template, command, capability, agent, hook, and CLI feature verified.
761
+ 1609 tests. Every template, command, capability, agent, hook, and CLI feature verified.
760
762
 
761
763
  ```bash
762
764
  bash tests/test-features.sh
763
- # Results: 1578 passed, 0 failed, 1578 total
765
+ # Results: 1609 passed, 0 failed, 1609 total
764
766
  ```
765
767
 
766
768
  ---
package/bin/cli.js CHANGED
@@ -8,7 +8,7 @@ const { execSync } = require('child_process');
8
8
 
9
9
  const TEMPLATE_DIR = path.join(__dirname, '..', 'templates');
10
10
  const CORE_COMMANDS = ['setup', 'fix', 'add', 'audit', 'test', 'blueprint', 'ship', 'pulse', 'explain', 'snapshot', 'persist'];
11
- const EXTENDED_COMMANDS = ['dream', 'refactor', 'doc', 'loop', 'migrate', 'deps', 'find', 'create', 'reflect', 'hookify', 'sentinel', 'clarify', 'spec', 'analyze', 'constitute', 'tasks', 'issues', 'driven', 'mcp', 'verify'];
11
+ const EXTENDED_COMMANDS = ['dream', 'refactor', 'doc', 'loop', 'migrate', 'deps', 'find', 'create', 'reflect', 'hookify', 'sentinel', 'clarify', 'spec', 'analyze', 'constitute', 'tasks', 'issues', 'driven', 'mcp', 'verify', 'inoculate', 'ghost-test'];
12
12
  const ADVANCED_COMMANDS = ['evolve', 'debate', 'level-up', 'copilot', 'reflexes', 'parallel'];
13
13
  const COMMANDS = [...CORE_COMMANDS, ...EXTENDED_COMMANDS, ...ADVANCED_COMMANDS];
14
14
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "azclaude-copilot",
3
- "version": "0.4.39",
4
- "description": "AI coding environment — 37 commands, 10 skills, 15 agents, memory, reflexes, evolution. Install: npx azclaude-copilot@latest, then open Claude Code.",
3
+ "version": "0.4.40",
4
+ "description": "AI coding environment — 39 commands, 10 skills, 15 agents, memory, reflexes, evolution. Install: npx azclaude-copilot@latest, then open Claude Code.",
5
5
  "bin": {
6
6
  "azclaude": "bin/cli.js",
7
7
  "azclaude-copilot": "bin/copilot.js"
@@ -65,4 +65,4 @@ When priorities conflict:
65
65
  3. {{PRIORITY_3}}
66
66
 
67
67
  ## Available Commands
68
- /dream · /setup · /fix · /add · /audit · /test · /blueprint · /evolve · /debate · /snapshot · /persist · /level-up · /ship · /pulse · /explain · /loop · /refactor · /doc · /migrate · /deps · /find · /create · /reflect · /hookify · /spec · /clarify · /analyze · /constitute · /tasks · /issues · /driven · /mcp · /parallel · /verify
68
+ /dream · /setup · /fix · /add · /audit · /test · /blueprint · /evolve · /debate · /snapshot · /persist · /level-up · /ship · /pulse · /explain · /loop · /refactor · /doc · /migrate · /deps · /find · /create · /reflect · /hookify · /spec · /clarify · /analyze · /constitute · /tasks · /issues · /driven · /mcp · /parallel · /verify · /inoculate · /ghost-test
@@ -104,6 +104,7 @@ If verdict is `APPROVED` or `APPROVED (no constitution found)`: proceed to Step
104
104
  **Parallel dispatch (2+ milestones in wave with disjoint Files Written):**
105
105
 
106
106
  Load `capabilities/shared/parallel-coordination.md` first.
107
+ Load `capabilities/shared/context-inoculation.md` and prepend its Required Preamble to every agent prompt below.
107
108
 
108
109
  1. Write `.claude/ownership.md` table (branch, directories, status) for every agent in this wave
109
110
  2. Spawn each builder via Task with `isolation: "worktree"` in the same message (true parallel)
@@ -113,6 +114,8 @@ Load `capabilities/shared/parallel-coordination.md` first.
113
114
 
114
115
  **Sequential dispatch (single milestone OR overlapping files):**
115
116
 
117
+ Load `capabilities/shared/context-inoculation.md` and prepend its Required Preamble to the agent prompt below.
118
+
116
119
  Spawn milestone-builder via Task with fully packaged context:
117
120
 
118
121
  ```
@@ -26,6 +26,8 @@ Load only the files that match the current task. Never load the full list.
26
26
  | shared/semantic-boundary-check.md | /evolve Cycle 3 or boundary validator warns — detect deeper behavioral duplication across extension types that grep misses | ~300 |
27
27
  | shared/domain-advisor-generator.md | Non-tech domain detected (compliance, marketing, finance, medical, legal, research) — generates domain-specific advisor skill | ~400 |
28
28
  | shared/intelligent-dispatch.md | About to build, fix, refactor, audit, or ship — non-trivial scope (3+ files or structural) — pre-flight analysis via problem-architect | ~300 |
29
+ | shared/context-inoculation.md | /copilot dispatch, milestone-builder spawn, autonomous agent context — inject anti-reward-hack boundaries (75-90% reduction) | ~120 |
30
+ | shared/reward-hack-detection.md | /audit, /ship, post-milestone review — detect reward hacking patterns in test modifications | ~150 |
29
31
 
30
32
  ## Level Builders — load ONE at a time
31
33
  | File | When to load | Tokens |
@@ -0,0 +1,39 @@
1
+ ---
2
+ name: context-inoculation
3
+ description: >
4
+ Explicit boundary language injected before autonomous task dispatch.
5
+ Based on Anthropic's "Natural Emergent Misalignment from Reward Hacking" paper
6
+ (Section 4.2). Reduces reward hacking generalization by 75-90%.
7
+ Load when: /copilot dispatch, milestone-builder spawn, any autonomous agent context.
8
+ tokens: ~120
9
+ ---
10
+
11
+ ## Context Inoculation
12
+
13
+ Inject these boundaries into every autonomous agent prompt before task dispatch.
14
+
15
+ ### Required Preamble (copy verbatim into agent context)
16
+
17
+ > You will be evaluated on the ACTUAL correctness of your work, not on reported metrics.
18
+ > Shortcuts that make tests pass without solving the underlying problem are known patterns — report them, never use them.
19
+ > If a test fails, fix the code — never weaken the test to hide the failure.
20
+ > If you cannot complete a task, report it as incomplete. Incomplete is acceptable; deception is not.
21
+
22
+ ### When to Inject
23
+
24
+ - Before every milestone-builder dispatch in /copilot mode
25
+ - Before every Task tool spawn in orchestrator
26
+ - Before /add and /fix when running autonomously (copilot-intent.md exists)
27
+
28
+ ### What It Prevents
29
+
30
+ - `__eq__` overrides that return True (all comparisons pass vacuously)
31
+ - `sys.exit(0)` in test files (tests exit before assertions run)
32
+ - Monkey-patching test reporters (conftest.py fakes pass results)
33
+ - Deleting or weakening assertions to make tests green
34
+ - Any pattern where the metric improves but the underlying behavior does not
35
+
36
+ ### Verification
37
+
38
+ After agent completes: if test count decreased OR assertions were removed, flag as suspicious.
39
+ Cross-reference with pre-tool-use reward hack rules for defense-in-depth.
@@ -0,0 +1,32 @@
1
+ ---
2
+ name: reward-hack-detection
3
+ description: >
4
+ Post-execution checklist for detecting reward hacking patterns in AI-generated code.
5
+ Based on Anthropic's "Natural Emergent Misalignment from Reward Hacking" paper.
6
+ Load when: /audit, /ship, post-milestone review, test result verification.
7
+ tokens: ~150
8
+ ---
9
+
10
+ ## Reward Hack Detection Checklist
11
+
12
+ Run after any autonomous code generation or test modification.
13
+
14
+ ### Static Checks
15
+
16
+ 1. **Test count delta** — `git diff HEAD --stat | grep test` — if test files shrank, investigate
17
+ 2. **Assertion removal** — `git diff HEAD -- '*test*' | grep '^-.*assert\|^-.*expect'` — removed assertions = red flag
18
+ 3. **Exit in tests** — `grep -rn 'sys\.exit\|os\._exit\|process\.exit' tests/` — test files should never exit
19
+ 4. **Equality override** — `grep -rn 'def __eq__.*return True' tests/` — fake equality = all comparisons pass
20
+ 5. **Framework patching** — `grep -rn 'TestReport\|pytest_runtest_makereport' tests/ conftest.py` — result fabrication
21
+
22
+ ### Behavioral Checks (from session security log)
23
+
24
+ 6. **Test run then test modify** — review seclog for `test-then-test-modify` pattern
25
+ 7. **Hook modification** — review seclog for `hook-self-modification` pattern
26
+ 8. **Metric vs. reality** — if reported "all tests pass" but test count dropped or assertions removed, flag
27
+
28
+ ### Response Protocol
29
+
30
+ - 1 flag = WARN in audit/ship report, require manual review
31
+ - 2+ flags = BLOCK /ship, require human verification before release
32
+ - Any flag + context-inoculation was NOT loaded = escalate (defense layer was missing)
@@ -115,6 +115,14 @@ Report violations only — no code quality feedback until spec is clean.
115
115
 
116
116
  ---
117
117
 
118
+ ## Step 2b: Reward Hack Detection (if autonomous code was generated)
119
+
120
+ If `.claude/copilot-intent.md` exists OR recent commits are from autonomous agents:
121
+ Load `capabilities/shared/reward-hack-detection.md` and run the Static Checks (1-5).
122
+ Report any flags before proceeding to code quality.
123
+
124
+ ---
125
+
118
126
  ## Step 3: Code Quality (only if Step 2 passes)
119
127
 
120
128
  **IDE diagnostics**: use `mcp__ide__getDiagnostics` if available.
@@ -0,0 +1,99 @@
1
+ ---
2
+ name: ghost-test
3
+ description: >
4
+ Detect reward hacking in test suites. Phase 1: static scan for known patterns
5
+ (AlwaysEqual, sys.exit bypass, conftest.py patching). Phase 2 (opt-in): canary
6
+ assertion injection — if assert 1==2 passes, the test framework is compromised.
7
+ Based on Anthropic's "Natural Emergent Misalignment" paper (Section 3, Figure 8).
8
+ argument-hint: "[--static | --canary | --all (default)]"
9
+ disable-model-invocation: true
10
+ allowed-tools: Read, Grep, Bash, Glob
11
+ ---
12
+
13
+ # /ghost-test — Test Integrity Verification
14
+
15
+ $ARGUMENTS
16
+
17
+ ---
18
+
19
+ ## Phase 1: Static Scan (always runs)
20
+
21
+ Detect the 3 reward hack patterns from the paper:
22
+
23
+ ### Pattern 1 — AlwaysEqual (`__eq__` override)
24
+ ```bash
25
+ grep -rn 'def __eq__.*return True\|__eq__.*=.*lambda.*True' tests/ test/ spec/ 2>/dev/null
26
+ ```
27
+ **Severity:** HIGH — all equality assertions pass vacuously.
28
+
29
+ ### Pattern 2 — Exit Bypass (`sys.exit(0)`)
30
+ ```bash
31
+ grep -rn 'sys\.exit\s*(\s*0\|os\._exit\s*(\s*0\|process\.exit\s*(\s*0' tests/ test/ spec/ *test*.py *test*.js *test*.ts 2>/dev/null
32
+ ```
33
+ **Severity:** HIGH — test process exits before assertions run.
34
+
35
+ ### Pattern 3 — Framework Patching (conftest.py / TestReport)
36
+ ```bash
37
+ grep -rn 'TestReport\.from_item_and_call\|pytest_runtest_makereport\|monkeypatch.*TestReport' tests/ test/ conftest.py **/conftest.py 2>/dev/null
38
+ ```
39
+ **Severity:** CRITICAL — test results are fabricated at the framework level.
40
+
41
+ Report each finding with `file:line` and severity.
42
+
43
+ ---
44
+
45
+ ## Phase 2: Canary Assertion (only with --canary or --all)
46
+
47
+ **Purpose:** Inject a deliberately failing test. If it passes, something is intercepting the test framework.
48
+
49
+ ### Step 1: Detect test framework
50
+ ```bash
51
+ [ -f pytest.ini ] || [ -f pyproject.toml ] && grep -q pytest pyproject.toml 2>/dev/null && echo "pytest"
52
+ [ -f jest.config.js ] || [ -f jest.config.ts ] && echo "jest"
53
+ [ -f package.json ] && grep -q '"vitest"' package.json 2>/dev/null && echo "vitest"
54
+ ```
55
+
56
+ ### Step 2: Create and run canary (via Bash — no Write tool needed)
57
+
58
+ **pytest:**
59
+ ```bash
60
+ echo 'def test_canary_must_fail(): assert 1 == 2, "canary"' > _ghost_canary_test.py
61
+ python -m pytest _ghost_canary_test.py --no-header -q 2>&1; CANARY_EXIT=$?
62
+ rm -f _ghost_canary_test.py
63
+ echo "CANARY_EXIT=$CANARY_EXIT"
64
+ ```
65
+
66
+ **jest:**
67
+ ```bash
68
+ echo 'test("canary must fail", () => { expect(1).toBe(2); });' > _ghost_canary_test.test.js
69
+ npx jest _ghost_canary_test.test.js --no-coverage 2>&1; CANARY_EXIT=$?
70
+ rm -f _ghost_canary_test.test.js
71
+ echo "CANARY_EXIT=$CANARY_EXIT"
72
+ ```
73
+
74
+ ### Step 3: Interpret
75
+ - **Exit code != 0** (canary FAILED as expected) = PASS — framework is honest
76
+ - **Exit code == 0** (canary PASSED unexpectedly) = **CRITICAL** — framework is compromised
77
+
78
+ ---
79
+
80
+ ## Report
81
+
82
+ ```
83
+ /ghost-test — Test Integrity Report
84
+
85
+ Phase 1 — Static Scan:
86
+ Pattern 1 (__eq__ override): [clean | N findings]
87
+ Pattern 2 (exit bypass): [clean | N findings]
88
+ Pattern 3 (framework patching): [clean | N findings]
89
+
90
+ Phase 2 — Canary Assertion:
91
+ Framework: [detected framework]
92
+ Result: FAILED (expected) = framework honest
93
+ PASSED (unexpected) = CRITICAL — framework compromised
94
+
95
+ Verdict: CLEAN | SUSPICIOUS (N findings) | COMPROMISED
96
+ ```
97
+
98
+ If any findings: list each with `file:line` and recommended action.
99
+ If COMPROMISED: recommend immediate investigation of conftest.py and test setup files.
@@ -0,0 +1,76 @@
1
+ ---
2
+ name: inoculate
3
+ description: >
4
+ Scan agent and skill files for context inoculation coverage.
5
+ Reports which files have inoculation language and which don't.
6
+ Based on Anthropic's "Natural Emergent Misalignment" paper (Section 4.2).
7
+ argument-hint: "[--scan | --generate | --all (default)]"
8
+ disable-model-invocation: true
9
+ allowed-tools: Read, Grep, Bash, Glob
10
+ ---
11
+
12
+ # /inoculate — Context Inoculation Scanner
13
+
14
+ $ARGUMENTS
15
+
16
+ ---
17
+
18
+ **EnterPlanMode** — this command is read-only. No file modifications.
19
+
20
+ ---
21
+
22
+ ## Step 1: Scan Agent Files
23
+
24
+ ```bash
25
+ ls .claude/agents/*.md 2>/dev/null || echo "No agents installed"
26
+ ```
27
+
28
+ For each agent file, check for inoculation markers:
29
+ ```bash
30
+ grep -l "actual correctness\|shortcuts.*unacceptable\|never.*fix the test\|deception is not\|never weaken" .claude/agents/*.md 2>/dev/null
31
+ ```
32
+
33
+ Classify each agent:
34
+ - **INOCULATED** — contains at least one inoculation phrase
35
+ - **NOT INOCULATED** — missing inoculation language (fix with --generate)
36
+ - **EXEMPT** — read-only agents (tools list has no Write/Edit) don't need inoculation
37
+
38
+ ## Step 2: Scan Skill Files
39
+
40
+ ```bash
41
+ ls .claude/skills/*/SKILL.md 2>/dev/null || echo "No skills installed"
42
+ ```
43
+
44
+ Same classification as agents.
45
+
46
+ ## Step 3: Report
47
+
48
+ Output format:
49
+ ```
50
+ /inoculate — Context Inoculation Coverage
51
+
52
+ Agents:
53
+ + milestone-builder.md INOCULATED
54
+ - code-reviewer.md NOT INOCULATED
55
+ . spec-reviewer.md EXEMPT (read-only)
56
+
57
+ Skills:
58
+ + test-first/SKILL.md INOCULATED
59
+ - skill-creator/SKILL.md NOT INOCULATED
60
+
61
+ Coverage: 4/8 agents, 2/5 skills (50%)
62
+ ```
63
+
64
+ ## Step 4: Generate (if --generate or --all)
65
+
66
+ For each NOT INOCULATED file:
67
+ 1. Read the agent/skill's purpose from its frontmatter `description` field
68
+ 2. Generate a 2-3 line inoculation block tailored to its role:
69
+ - For code-writing agents: "verify test results reflect actual behavior, not framework manipulation"
70
+ - For test-writing agents: "every test must contain at least one meaningful assertion on computed output"
71
+ - For review agents: "flag test files where assertion count decreased or comparisons were weakened"
72
+ 3. Output the generated text for the user to review — do NOT modify files automatically
73
+
74
+ **ExitPlanMode**
75
+
76
+ Show the report. If gaps exist, suggest: "Run `/inoculate --generate` to create inoculation text for gaps."
@@ -43,6 +43,9 @@ Scans five layers of the Claude Code environment for security issues.
43
43
  Each layer is scored independently. Final score = weighted average (0–100).
44
44
  Grade: A ≥ 90 · B ≥ 75 · C ≥ 60 · D ≥ 45 · F < 45
45
45
 
46
+ **Related:** Run `/ghost-test` for test-specific reward hack detection (AlwaysEqual, sys.exit bypass, framework patching).
47
+ Run `/inoculate` to check context inoculation coverage across agents and skills.
48
+
46
49
  Parse $ARGUMENTS:
47
50
  - `--hooks` → run Layer 1 + 2 only
48
51
  - `--mcp` → run Layer 3 only
@@ -63,6 +63,12 @@ If `agent=found`: read `.claude/agents/security-auditor.md` and execute the secr
63
63
  ```
64
64
  ✗ Pre-ship blocked: security-auditor found BLOCKED findings. Run /sentinel for details.
65
65
  ```
66
+ **0c. Test integrity check** — detect reward hacking patterns in test suite:
67
+ ```bash
68
+ grep -rn 'def __eq__.*return True\|sys\.exit\s*(0)\|TestReport\.from_item_and_call' tests/ test/ conftest.py 2>/dev/null
69
+ ```
70
+ If any match: WARN. `⚠ Pre-ship warning: reward hack pattern detected in test files. Run /ghost-test for details.`
71
+
66
72
  If `agent=missing`: run inline secret scan:
67
73
  ```bash
68
74
  grep -rn "AKIA[A-Z0-9]\{16\}\|ghp_[A-Za-z0-9]\{36\}\|glpat-\|xoxb-\|sk_live_\|-----BEGIN.*PRIVATE KEY" \
@@ -28,6 +28,16 @@ If writing new tests: follow the naming pattern from code-rules, not generic con
28
28
 
29
29
  ---
30
30
 
31
+ ## Step 0: Test Integrity (if autonomous code was generated)
32
+
33
+ If `.claude/copilot-intent.md` exists: run a quick reward hack scan before executing tests:
34
+ ```bash
35
+ grep -rn 'def __eq__.*return True\|sys\.exit\s*(0)\|TestReport\.from_item_and_call' tests/ test/ conftest.py 2>/dev/null
36
+ ```
37
+ If any match: WARN before running the suite. `⚠ Reward hack pattern detected — run /ghost-test for full analysis.`
38
+
39
+ ---
40
+
31
41
  ## Step 1: IDE Diagnostics First
32
42
 
33
43
  Use `mcp__ide__getDiagnostics` if available.
@@ -209,6 +209,47 @@ if (HOOK_PROFILE !== 'minimal') {
209
209
  );
210
210
  }
211
211
  }
212
+
213
+ // ── Reward hack behavioral patterns (Anthropic "Emergent Misalignment" paper) ──
214
+
215
+ // Pattern: Bash(test run) → Edit/Write(test file) = possible reward hacking
216
+ if (secSeq.length >= 2) {
217
+ const prev2 = secSeq[secSeq.length - 2];
218
+ const curr2 = secSeq[secSeq.length - 1];
219
+ if (prev2.tool === 'Bash' && /\b(pytest|jest|mocha|vitest|npm\s+test|npx\s+test)\b/i.test(prev2.file || '')
220
+ && (curr2.tool === 'Edit' || curr2.tool === 'Write' || curr2.tool === 'MultiEdit')
221
+ && /test[_/\\]|_test\.|\.test\.|\.spec\.|conftest/i.test(curr2.file || '')) {
222
+ const seclogPath = path.join(os.tmpdir(), `.azclaude-seclog-${process.ppid || process.pid}`);
223
+ const entry2 = JSON.stringify({
224
+ ts: obsTs, hook: 'post-tool-use',
225
+ rule: 'test-then-test-modify', level: 'warn',
226
+ target: `${prev2.tool}(test) → ${curr2.tool}(${path.basename(curr2.file || '')})`
227
+ });
228
+ try { fs.appendFileSync(seclogPath, entry2 + '\n'); } catch (_) {}
229
+ process.stderr.write(
230
+ `\n⚠ SECURITY: Test run then test file modification — verify edits fix the code, not fake the result.\n`
231
+ );
232
+ }
233
+ }
234
+
235
+ // Pattern: Any Edit/Write to .claude/hooks/ = always warn (hook self-modification)
236
+ {
237
+ const currH = secSeq[secSeq.length - 1];
238
+ if (currH && (currH.tool === 'Edit' || currH.tool === 'Write' || currH.tool === 'MultiEdit')
239
+ && /\.claude[/\\]hooks[/\\]/i.test(currH.file || '')) {
240
+ const seclogPath = path.join(os.tmpdir(), `.azclaude-seclog-${process.ppid || process.pid}`);
241
+ const entryH = JSON.stringify({
242
+ ts: obsTs, hook: 'post-tool-use',
243
+ rule: 'hook-self-modification', level: 'warn',
244
+ target: path.basename(currH.file || '')
245
+ });
246
+ try { fs.appendFileSync(seclogPath, entryH + '\n'); } catch (_) {}
247
+ process.stderr.write(
248
+ `\n⚠ SECURITY: Hook file modified (${path.basename(currH.file || '')}) — hooks control all tool execution. Verify this change is intentional.\n`
249
+ );
250
+ }
251
+ }
252
+
212
253
  // Auto-truncate: keep last 2000 lines max (prevent unbounded growth)
213
254
  try {
214
255
  const obsContent = fs.readFileSync(obsPath, 'utf8');
@@ -227,6 +227,28 @@ const RULES = [
227
227
  message: 'Hardcoded secret pattern detected',
228
228
  block: true,
229
229
  },
230
+ // ── Reward hack detection (Anthropic "Emergent Misalignment" paper, Section 3) ──
231
+ {
232
+ id: 'test-always-equal',
233
+ test: /def\s+__eq__\s*\([\s\S]*?return\s+True/,
234
+ fileTest: /test[_/\\]|_test\.py$|tests?[/\\]|conftest\.py$/i,
235
+ message: 'Test __eq__ override returning True — reward hack pattern (AlwaysEqual: all comparisons pass vacuously). Review or justify.',
236
+ block: false,
237
+ },
238
+ {
239
+ id: 'test-exit-bypass',
240
+ test: /\b(sys\.exit\s*\(\s*0\s*\)|os\._exit\s*\(\s*0\s*\)|process\.exit\s*\(\s*0\s*\))/,
241
+ fileTest: /test[_/\\]|_test\.(py|js|ts)$|\.(test|spec)\.[jt]sx?$|tests?[/\\]|spec[/\\]/i,
242
+ message: 'Exit call in test file — reward hack pattern (process exits before assertions run). Remove or justify.',
243
+ block: false,
244
+ },
245
+ {
246
+ id: 'test-framework-patch',
247
+ test: /TestReport\.from_item_and_call|pytest_runtest_makereport|monkeypatch.*TestReport/,
248
+ fileTest: /conftest\.py$|test[_/\\]|_test\.py$|tests?[/\\]/i,
249
+ message: 'Test framework monkey-patching detected — reward hack pattern (conftest.py fakes pass results). Review carefully.',
250
+ block: false,
251
+ },
230
252
  ];
231
253
 
232
254
  // ── Session dedup ─────────────────────────────────────────────────────────────
@@ -262,6 +284,7 @@ const displayName = filePath
262
284
  let didBlock = false;
263
285
 
264
286
  for (const rule of RULES) {
287
+ if (rule.fileTest && !rule.fileTest.test(filePath)) continue; // file-scoped rules (reward hack detection)
265
288
  if (!rule.test.test(content)) continue;
266
289
 
267
290
  const dedupKey = `${displayName}:${rule.id}`;