oh-my-customcode 0.37.2 → 0.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +20 -5
  2. package/dist/cli/index.js +1189 -99
  3. package/dist/index.js +4 -1
  4. package/package.json +3 -1
  5. package/templates/.claude/hooks/hooks.json +24 -12
  6. package/templates/.claude/hooks/scripts/agent-teams-advisor.sh +3 -0
  7. package/templates/.claude/hooks/scripts/audit-log.sh +3 -0
  8. package/templates/.claude/hooks/scripts/content-hash-validator.sh +3 -0
  9. package/templates/.claude/hooks/scripts/context-budget-advisor.sh +4 -1
  10. package/templates/.claude/hooks/scripts/eval-core-batch-save.sh +23 -0
  11. package/templates/.claude/hooks/scripts/git-delegation-guard.sh +3 -0
  12. package/templates/.claude/hooks/scripts/model-escalation-advisor.sh +3 -0
  13. package/templates/.claude/hooks/scripts/schema-validator.sh +3 -0
  14. package/templates/.claude/hooks/scripts/secret-filter.sh +3 -0
  15. package/templates/.claude/hooks/scripts/session-env-check.sh +20 -0
  16. package/templates/.claude/hooks/scripts/stuck-detector.sh +4 -1
  17. package/templates/.claude/hooks/scripts/task-outcome-recorder.sh +16 -1
  18. package/templates/.claude/rules/MAY-optimization.md +12 -0
  19. package/templates/.claude/rules/MUST-agent-design.md +45 -7
  20. package/templates/.claude/rules/MUST-completion-verification.md +81 -0
  21. package/templates/.claude/rules/SHOULD-memory-integration.md +81 -0
  22. package/templates/.claude/skills/de-lead-routing/SKILL.md +8 -92
  23. package/templates/.claude/skills/deep-plan/SKILL.md +55 -4
  24. package/templates/.claude/skills/dev-lead-routing/SKILL.md +9 -21
  25. package/templates/.claude/skills/dev-refactor/SKILL.md +34 -1
  26. package/templates/.claude/skills/evaluator-optimizer/SKILL.md +53 -0
  27. package/templates/.claude/skills/qa-lead-routing/SKILL.md +7 -242
  28. package/templates/.claude/skills/research/SKILL.md +74 -7
  29. package/templates/.claude/skills/sauron-watch/SKILL.md +81 -0
  30. package/templates/.claude/skills/secretary-routing/SKILL.md +3 -18
  31. package/templates/.claude/skills/structured-dev-cycle/SKILL.md +20 -3
  32. package/templates/guides/claude-code/index.yaml +5 -0
  33. package/templates/manifest.json +3 -3
  34. package/templates/.claude/hooks/scripts/session-compliance-report.sh +0 -65
package/dist/index.js CHANGED
@@ -544,7 +544,9 @@ function getDefaultConfig() {
544
544
  ".claude/agent-memory/",
545
545
  ".claude/agent-memory-local/"
546
546
  ],
547
- customComponents: []
547
+ customComponents: [],
548
+ domain: undefined,
549
+ teamMode: false
548
550
  };
549
551
  }
550
552
  function getDefaultPreferences() {
@@ -1387,6 +1389,7 @@ async function installEntryDocWithTracking(targetDir, options, result) {
1387
1389
  async function updateInstallConfig(targetDir, options, installedComponents) {
1388
1390
  const config = await loadConfig(targetDir);
1389
1391
  config.language = options.language ?? DEFAULT_LANGUAGE;
1392
+ config.domain = options.domain;
1390
1393
  config.installedAt = new Date().toISOString();
1391
1394
  config.installedComponents = installedComponents;
1392
1395
  await saveConfig(targetDir, config);
package/package.json CHANGED
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "name": "oh-my-customcode",
3
- "version": "0.37.2",
3
+ "workspaces": ["packages/*"],
4
+ "version": "0.39.0",
4
5
  "description": "Batteries-included agent harness for Claude Code",
5
6
  "type": "module",
6
7
  "bin": {
@@ -41,6 +42,7 @@
41
42
  "prepublishOnly": "bun run build && bun run test"
42
43
  },
43
44
  "dependencies": {
45
+ "@clack/prompts": "^1.1.0",
44
46
  "commander": "^14.0.2",
45
47
  "i18next": "^25.8.0",
46
48
  "yaml": "^2.8.2"
@@ -141,6 +141,18 @@
141
141
  "description": "Record agent outcomes on subagent completion (complements PostToolUse Agent matcher)"
142
142
  }
143
143
  ],
144
+ "PostCompact": [
145
+ {
146
+ "matcher": "*",
147
+ "hooks": [
148
+ {
149
+ "type": "prompt",
150
+ "prompt": "Context was just compacted. MANDATORY post-compaction steps: 1) All enforced rules (R007 Agent ID, R008 Tool ID, R009 Parallel, R010 Orchestrator, R018 Agent Teams) remain ACTIVE — compaction does NOT disable rules. 2) Your next response MUST include agent identification per R007. 3) Continue delegating ALL file modifications to subagents per R010. 4) Re-read CLAUDE.md if needed to refresh project-specific context."
151
+ }
152
+ ],
153
+ "description": "Reinforce enforced rules after context compaction — prevents rule amnesia (v2.1.76+)"
154
+ }
155
+ ],
144
156
  "PostToolUse": [
145
157
  {
146
158
  "matcher": "tool == \"Bash\"",
@@ -212,16 +224,6 @@
212
224
  ],
213
225
  "description": "Type check Python files with ty after edits"
214
226
  },
215
- {
216
- "matcher": "tool == \"Task\" || tool == \"Agent\"",
217
- "hooks": [
218
- {
219
- "type": "command",
220
- "command": "bash .claude/hooks/scripts/task-outcome-recorder.sh"
221
- }
222
- ],
223
- "description": "Record agent/task outcomes (success/failure) for model escalation decisions"
224
- },
225
227
  {
226
228
  "matcher": "tool == \"Edit\" || tool == \"Write\" || tool == \"Bash\" || tool == \"Task\" || tool == \"Agent\" || tool == \"Read\" || tool == \"Glob\" || tool == \"Grep\"",
227
229
  "hooks": [
@@ -294,15 +296,25 @@
294
296
  ],
295
297
  "description": "Final console.log audit and session diagnostics before session ends"
296
298
  },
299
+ {
300
+ "matcher": "*",
301
+ "hooks": [
302
+ {
303
+ "type": "command",
304
+ "command": "bash .claude/hooks/scripts/eval-core-batch-save.sh"
305
+ }
306
+ ],
307
+ "description": "Batch-save agent outcomes to eval-core DB on session end (advisory, exit 0)"
308
+ },
297
309
  {
298
310
  "matcher": "*",
299
311
  "hooks": [
300
312
  {
301
313
  "type": "prompt",
302
- "prompt": "Session-end memory checkpoint (R011 enforcement). Check conversation history for these 2 steps: 1) sys-memory-keeper was delegated to update MEMORY.md 2) claude-mem save was attempted via ToolSearch + mcp__plugin_claude-mem_mcp-search__save_memory. Note: episodic-memory auto-indexes after session — no manual verification needed. Decision rules: If BOTH were attempted (success or failure both count): approve. If MCP tools are unavailable after ToolSearch attempt: approve with note. If session had no explicit session-end signal from user (quick question, no memory work): approve. If any step was NOT attempted despite user signaling session end: block with systemMessage listing the missing steps."
314
+ "prompt": "Session-end memory checkpoint (R011 enforcement). Check if the session-end memory save workflow was completed: 1) sys-memory-keeper was delegated to update MEMORY.md 2) claude-mem save was attempted via ToolSearch + mcp__plugin_claude-mem_mcp-search__save_memory. Note: episodic-memory auto-indexes after session — no manual action needed. Decision rules (evaluate in order): 1. If session had no explicit session-end signal (quick question, no memory work): APPROVE immediately. 2. If BOTH steps were already attempted (success or failure both count): APPROVE. 3. If MCP tools unavailable after ToolSearch attempt: APPROVE with note. 4. If steps were NOT yet performed: PERFORM THEM NOW (maximum 1 attempt each): a. Delegate to sys-memory-keeper to update MEMORY.md — call Agent tool with sys-memory-keeper. b. Attempt claude-mem save — ToolSearch for save_memory tool, then call it. c. After completion (success or failure), APPROVE stop. IMPORTANT: Do NOT block indefinitely. After 1 attempt per step, always APPROVE. IMPORTANT: Do NOT loop if you already performed these steps in this same stop hook evaluation, APPROVE."
303
315
  }
304
316
  ],
305
- "description": "Enforce R011 session-end memory saves — block stop if claude-mem or episodic-memory saves were skipped"
317
+ "description": "Enforce R011 session-end memory saves — auto-perform if not yet done (episodic-memory auto-indexes)"
306
318
  }
307
319
  ]
308
320
  }
@@ -1,6 +1,9 @@
1
1
  #!/bin/bash
2
2
  set -euo pipefail
3
3
 
4
+ # Dependency check: exit silently if jq not available
5
+ command -v jq >/dev/null 2>&1 || exit 0
6
+
4
7
  # Agent Teams Advisor Hook
5
8
  # Trigger: PreToolUse, tool == "Task" || tool == "Agent"
6
9
  # Purpose: Track Agent/Task tool usage count per session and warn when Agent Teams may be more appropriate
@@ -7,6 +7,9 @@
7
7
 
8
8
  set -euo pipefail
9
9
 
10
+ # Dependency check: exit silently if jq not available
11
+ command -v jq >/dev/null 2>&1 || exit 0
12
+
10
13
  input=$(cat)
11
14
 
12
15
  # Extract fields from hook input
@@ -7,6 +7,9 @@
7
7
 
8
8
  set -euo pipefail
9
9
 
10
+ # Dependency check: exit silently if jq not available
11
+ command -v jq >/dev/null 2>&1 || exit 0
12
+
10
13
  input=$(cat)
11
14
 
12
15
  # Hash store (PPID-scoped, session-only)
@@ -1,6 +1,9 @@
1
1
  #!/bin/bash
2
2
  set -euo pipefail
3
3
 
4
+ # Dependency check: exit silently if jq not available
5
+ command -v jq >/dev/null 2>&1 || exit 0
6
+
4
7
  # Context Budget Advisor Hook
5
8
  # Trigger: PostToolUse (Edit/Write/Agent/Task/Read/Glob/Grep/Bash)
6
9
  # Purpose: Monitor context usage and advise ecomode activation based on task type
@@ -29,7 +32,7 @@ read_count=${read_count:-0}
29
32
  agent_count=${agent_count:-0}
30
33
 
31
34
  # Determine tool type from input
32
- TOOL=$(echo "$input" | jq -r '.tool // ""' 2>/dev/null || echo "")
35
+ TOOL=$(echo "$input" | jq -r '.tool_name // ""' 2>/dev/null || echo "")
33
36
  tool_count=$((tool_count + 1))
34
37
 
35
38
  case "$TOOL" in
@@ -0,0 +1,23 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+
4
+ # Eval-Core Batch Save on Session End (Advisory Only)
5
+ # Trigger: Stop hook
6
+ # Purpose: Auto-collect eval metrics on session end via eval-core CLI
7
+ # Protocol: stdin JSON -> process -> stdout pass-through, exit 0 always
8
+ #
9
+ # This hook is advisory-only and never blocks session termination.
10
+ # If eval-core is unavailable or collection fails, the session continues normally.
11
+
12
+ input=$(cat)
13
+ PPID_FILE="/tmp/.claude-task-outcomes-${PPID}"
14
+
15
+ # Only attempt collection if outcome file exists and eval-core is available
16
+ if [ -f "$PPID_FILE" ] && command -v eval-core >/dev/null 2>&1; then
17
+ echo "[Hook] Collecting eval metrics via eval-core..." >&2
18
+ eval-core collect --ppid "$PPID" 2>/dev/null || true
19
+ fi
20
+
21
+ # Always pass through input and exit 0 (advisory only)
22
+ echo "$input"
23
+ exit 0
@@ -3,6 +3,9 @@
3
3
  # Warns when git operations are delegated to a non-mgr-gitnerd agent via Agent/Task tool.
4
4
  # WARN only - does NOT block (exit 0, passes input through).
5
5
 
6
+ # Dependency check: exit silently if jq not available
7
+ command -v jq >/dev/null 2>&1 || exit 0
8
+
6
9
  input=$(cat)
7
10
 
8
11
  agent_type=$(echo "$input" | jq -r '.tool_input.subagent_type // ""')
@@ -1,6 +1,9 @@
1
1
  #!/bin/bash
2
2
  set -euo pipefail
3
3
 
4
+ # Dependency check: exit silently if jq not available
5
+ command -v jq >/dev/null 2>&1 || exit 0
6
+
4
7
  # Model Escalation Advisor Hook
5
8
  # Trigger: PreToolUse, tool == "Task" || tool == "Agent"
6
9
  # Purpose: Advise model escalation when failure patterns detected
@@ -7,6 +7,9 @@
7
7
 
8
8
  set -euo pipefail
9
9
 
10
+ # Dependency check: exit silently if jq not available
11
+ command -v jq >/dev/null 2>&1 || exit 0
12
+
10
13
  input=$(cat)
11
14
 
12
15
  # Extract tool info
@@ -7,6 +7,9 @@
7
7
 
8
8
  set -euo pipefail
9
9
 
10
+ # Dependency check: exit silently if jq not available
11
+ command -v jq >/dev/null 2>&1 || exit 0
12
+
10
13
  input=$(cat)
11
14
 
12
15
  # Extract output to scan
@@ -82,6 +82,20 @@ if command -v git >/dev/null 2>&1 && git rev-parse --is-inside-work-tree >/dev/n
82
82
  fi
83
83
  fi
84
84
 
85
+ # --- CI Status Check ---
86
+ # Check last CI run status if gh CLI is available
87
+ if command -v gh &>/dev/null; then
88
+ ci_status=$(gh run list --limit 1 --json conclusion -q '.[0].conclusion' 2>/dev/null || echo "unknown")
89
+ ci_name=$(gh run list --limit 1 --json name -q '.[0].name' 2>/dev/null || echo "unknown")
90
+ if [ "$ci_status" = "failure" ]; then
91
+ echo "[Session] ⚠ WARNING: Last CI run FAILED (${ci_name}) — check before pushing" >&2
92
+ elif [ "$ci_status" = "success" ]; then
93
+ echo "[Session] CI: last run passed (${ci_name})" >&2
94
+ elif [ "$ci_status" != "unknown" ]; then
95
+ echo "[Session] CI: last run status: ${ci_status} (${ci_name})" >&2
96
+ fi
97
+ fi
98
+
85
99
  # Update availability check (local cache only — no network calls)
86
100
  OMCUSTOM_UPDATE_STATUS="unknown"
87
101
  INSTALLED_VERSION=""
@@ -173,6 +187,12 @@ case "$DRIFT_STATUS" in
173
187
  esac
174
188
  echo "------------------------------------" >&2
175
189
 
190
+ # SessionEnd hooks timeout (v2.1.74+)
191
+ if [ -z "${CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS:-}" ]; then
192
+ echo "[SessionEnv] ⚠ CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS not set (default: 1500ms)" >&2
193
+ echo "[SessionEnv] Recommend: export CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS=10000" >&2
194
+ fi
195
+
176
196
  # Update Check report
177
197
  echo "" >&2
178
198
  echo " [Update Check]" >&2
@@ -1,6 +1,9 @@
1
1
  #!/bin/bash
2
2
  set -euo pipefail
3
3
 
4
+ # Dependency check: exit silently if jq not available
5
+ command -v jq >/dev/null 2>&1 || exit 0
6
+
4
7
  # Stuck Detector Hook
5
8
  # Trigger: PostToolUse, tool matches "Edit|Write|Bash|Task|Agent"
6
9
  # Purpose: Detect repetitive failure loops and advise recovery
@@ -9,7 +12,7 @@ set -euo pipefail
9
12
  # - exit 1: hard block (extreme stuck loops, >= HARD_BLOCK_THRESHOLD repetitions)
10
13
 
11
14
  # Hard block threshold: consecutive identical operations before blocking
12
- HARD_BLOCK_THRESHOLD=5
15
+ HARD_BLOCK_THRESHOLD=${CLAUDE_STUCK_THRESHOLD:-3}
13
16
 
14
17
  input=$(cat)
15
18
 
@@ -1,6 +1,9 @@
1
1
  #!/bin/bash
2
2
  set -euo pipefail
3
3
 
4
+ # Dependency check: exit silently if jq not available
5
+ command -v jq >/dev/null 2>&1 || exit 0
6
+
4
7
  # Task/Agent Outcome Recorder Hook
5
8
  # Trigger: PostToolUse (tool == "Task" || "Agent") and SubagentStop
6
9
  # Purpose: Record task outcomes for model escalation decisions
@@ -13,6 +16,17 @@ agent_type=$(echo "$input" | jq -r '.tool_input.subagent_type // .agent_type //
13
16
  model=$(echo "$input" | jq -r '.tool_input.model // .model // "inherit"')
14
17
  description=$(echo "$input" | jq -r '.tool_input.description // .description // ""' | head -c 80)
15
18
 
19
+ # Extract skill name from description or prompt
20
+ skill_name=""
21
+ if echo "$description" | grep -qiE '(skill:|routing|→.*skill)'; then
22
+ skill_name=$(echo "$description" | grep -oiE '[a-z]+-[a-z]+(-[a-z]+)*-?(routing|skill|practices|detection|decomposition|orchestration|pipeline|guards|cycle|plan|review|refactor|publish|version|audit|exec|analyze|bundle|report|setup|watch|lists|status|help|save|recall)' | head -1)
23
+ fi
24
+ # Fallback: check prompt field for "Skill: {name}" pattern
25
+ if [ -z "$skill_name" ]; then
26
+ prompt=$(echo "$input" | jq -r '.tool_input.prompt // ""' | head -c 500)
27
+ skill_name=$(echo "$prompt" | grep -oiE 'Skill:\s*[a-z]+-[a-z]+(-[a-z]+)*' | sed 's/[Ss]kill:\s*//' | head -1)
28
+ fi
29
+
16
30
  # Determine outcome
17
31
  is_error=$(echo "$input" | jq -r '.tool_output.is_error // false')
18
32
 
@@ -61,9 +75,10 @@ entry=$(jq -n \
61
75
  --arg model "$model" \
62
76
  --arg outcome "$outcome" \
63
77
  --arg pattern "$pattern" \
78
+ --arg skill "$skill_name" \
64
79
  --arg desc "$description" \
65
80
  --arg err "$error_summary" \
66
- '{timestamp: $ts, agent_type: $agent, model: $model, outcome: $outcome, pattern_used: $pattern, description: $desc, error_summary: $err}')
81
+ '{timestamp: $ts, agent_type: $agent, model: $model, outcome: $outcome, pattern_used: $pattern, skill: $skill, description: $desc, error_summary: $err}')
67
82
 
68
83
  echo "$entry" >> "$OUTCOME_FILE"
69
84
 
@@ -27,3 +27,15 @@
27
27
  | Repetitive tasks, clear bottleneck, measurable gain | One-time tasks, already fast, complexity > benefit |
28
28
 
29
29
  Readability > Optimization. No optimization without measurement.
30
+
31
+ ## CLAUDE.md Context Optimization (v2.1.72+)
32
+
33
+ HTML comments in CLAUDE.md are hidden from the model during auto-injection but visible via Read tool.
34
+
35
+ | Use Case | Example |
36
+ |----------|---------|
37
+ | Metadata tags | `<!-- agents: 44, skills: 74 -->` |
38
+ | Validation checksums | `<!-- validate-docs: hash=abc123 -->` |
39
+ | Conditional context | `<!-- detailed-architecture: see guides/architecture/ -->` |
40
+
41
+ **Rule**: Move model-unnecessary metadata into HTML comments to reduce context token usage. Keep actionable instructions as visible text.
@@ -11,7 +11,7 @@ Location: `.claude/agents/{name}.md` (single file, kebab-case)
11
11
  ```yaml
12
12
  name: agent-name # Unique identifier (kebab-case)
13
13
  description: Brief desc # One-line summary
14
- model: sonnet # sonnet | opus | haiku
14
+ model: sonnet # sonnet | opus | haiku (or full ID: claude-sonnet-4-6)
15
15
  tools: [Read, Write, ...] # Allowed tools
16
16
  ```
17
17
 
@@ -31,9 +31,10 @@ escalation: # Model escalation policy (optional)
31
31
  path: haiku → sonnet → opus # Escalation sequence
32
32
  threshold: 2 # Failures before advisory
33
33
  soul: true # Enable SOUL.md identity injection
34
- isolation: worktree # Run in isolated git worktree
34
+ isolation: worktree | sandbox # worktree = git worktree, sandbox = restricted bash
35
35
  background: true # Run in background
36
36
  maxTurns: 10 # Max conversation turns
37
+ maxTokens: 100000 # Per-turn token ceiling
37
38
  mcpServers: [server-1] # MCP servers available
38
39
  hooks: # Agent-specific hooks
39
40
  PreToolUse:
@@ -41,9 +42,33 @@ hooks: # Agent-specific hooks
41
42
  command: "echo hook"
42
43
  permissionMode: bypassPermissions # Permission mode
43
44
  disallowedTools: [Bash] # Tools to disallow
45
+ limitations: # Negative capability declarations
46
+ - "cannot execute tests"
47
+ - "cannot modify code"
48
+ domain: backend # backend | frontend | data-engineering | devops | universal
44
49
  ```
45
50
 
46
- > **Note**: `isolation`, `background`, `maxTurns`, `mcpServers`, `hooks`, `permissionMode`, `disallowedTools` are supported in Claude Code v2.1.63+.
51
+ > **Note**: `isolation`, `background`, `maxTurns`, `maxTokens`, `mcpServers`, `hooks`, `permissionMode`, `disallowedTools`, `limitations` are supported in Claude Code v2.1.63+. Hook types `PostCompact`, `Elicitation`, `ElicitationResult` require v2.1.76+.
52
+
53
+ ### Isolation Modes
54
+
55
+ | Mode | Behavior | Use Case |
56
+ |------|----------|----------|
57
+ | `worktree` | Isolated git worktree copy | Code changes that need rollback safety |
58
+ | `sandbox` | Restricted Bash environment | Agents running untrusted or scan commands |
59
+
60
+ When `isolation: sandbox` is set, the agent's Bash calls run with restricted permissions. This is advisory metadata — enforcement depends on the execution environment.
61
+
62
+ ### Token Ceiling
63
+
64
+ When `maxTokens` is set, it serves as advisory metadata for the orchestrator to manage agent turn budgets. The orchestrator should track output and consider escalation or task splitting when an agent approaches its ceiling.
65
+
66
+ ### Negative Capabilities (Limitations)
67
+
68
+ The `limitations` field declares what an agent explicitly CANNOT or SHOULD NOT do. This enables:
69
+ 1. **Clearer routing**: Orchestrator knows agent boundaries
70
+ 2. **Safer delegation**: Prevents accidental capability overreach
71
+ 3. **Better documentation**: Makes agent scope explicit
47
72
 
48
73
  ### Escalation Policy
49
74
 
@@ -144,6 +169,19 @@ user-invocable: false # Whether user can invoke directly
144
169
  disable-model-invocation: true # Prevent model from auto-invoking
145
170
  ```
146
171
 
172
+ ### Skill Effectiveness Tracking
173
+
174
+ Skills can optionally track effectiveness metrics via auto-populated fields:
175
+
176
+ ```yaml
177
+ effectiveness: # Auto-populated by sys-memory-keeper
178
+ invocations: 0 # Total invocation count across sessions
179
+ success_rate: 0.0 # Success rate (0.0-1.0)
180
+ last_invoked: "" # ISO-8601 timestamp
181
+ ```
182
+
183
+ These fields are read-only from the skill's perspective — sys-memory-keeper updates them at session end based on task-outcome-recorder data. They inform model selection, routing optimization, and skill maintenance priorities.
184
+
147
185
  ## Skill Scope
148
186
 
149
187
  | Scope | Purpose | Deployed via init? |
@@ -156,7 +194,7 @@ Default: `core` (when field is omitted)
156
194
 
157
195
  ### Context Fork Criteria
158
196
 
159
- Use `context: fork` for skills that orchestrate multi-agent workflows. Cap at **10 total** across the project.
197
+ Use `context: fork` for skills that orchestrate multi-agent workflows. Cap at **12 total** across the project.
160
198
 
161
199
  | Use `context: fork` | Do NOT use `context: fork` |
162
200
  |---------------------|---------------------------|
@@ -165,10 +203,10 @@ Use `context: fork` for skills that orchestrate multi-agent workflows. Cap at **
165
203
  | Multi-agent coordination patterns | Single-agent reference skills |
166
204
  | Task decomposition/planning | External tool integrations |
167
205
 
168
- Current skills with `context: fork` (8/10 cap):
206
+ Current skills with `context: fork` (11/12 cap):
169
207
  - secretary-routing, dev-lead-routing, de-lead-routing, qa-lead-routing
170
- - dag-orchestration, task-decomposition, worker-reviewer-pipeline
171
- - pipeline-guards
208
+ - dag-orchestration, task-decomposition, worker-reviewer-pipeline, pipeline-guards
209
+ - deep-plan, evaluator-optimizer, sauron-watch
172
210
 
173
211
  ## Naming
174
212
 
@@ -0,0 +1,81 @@
1
+ # [MUST] Completion Verification Rules
2
+
3
+ > **Priority**: MUST | **ID**: R020
4
+
5
+ ## Core Rule
6
+
7
+ Before declaring any task `[Done]`, verify completion against task-type-specific criteria. False completion declarations erode trust and cause downstream failures.
8
+
9
+ ## Task-Type Completion Matrix
10
+
11
+ | Task Type | REQUIRED Verification Before [Done] |
12
+ |-----------|-------------------------------------|
13
+ | Release | All issues closed, version bumped, PR merged, GitHub Release created |
14
+ | Implementation | Code compiles/passes lint, tests pass (if exist), no TODO markers left |
15
+ | Documentation | Links valid, counts accurate, cross-references updated |
16
+ | Git Operations | Operation succeeded (check exit code), working tree clean |
17
+ | Code Review | All findings addressed or explicitly deferred with justification |
18
+ | Agent/Skill Creation | Frontmatter valid, referenced skills exist, routing updated |
19
+
20
+ ## Self-Check (Before Declaring Done)
21
+
22
+ ```
23
+ ╔══════════════════════════════════════════════════════════════════╗
24
+ ║ BEFORE DECLARING [Done], ASK YOURSELF: ║
25
+ ║ ║
26
+ ║ 1. Did I verify the ACTUAL outcome (not just attempt)? ║
27
+ ║ "I ran the command" ≠ "the command succeeded" ║
28
+ ║ YES → Continue ║
29
+ ║ NO → Verify outcome first ║
30
+ ║ ║
31
+ ║ 2. Does the task type have specific criteria above? ║
32
+ ║ YES → Check each criterion ║
33
+ ║ NO → Apply general verification ║
34
+ ║ ║
35
+ ║ 3. Are there any unchecked items in the task's checklist? ║
36
+ ║ YES → Complete them or explicitly defer with reason ║
37
+ ║ NO → Good. Proceed to [Done] ║
38
+ ║ ║
39
+ ║ 4. Would I bet $100 this task is truly complete? ║
40
+ ║ YES → Declare [Done] ║
41
+ ║ NO → Identify what's uncertain and verify ║
42
+ ╚══════════════════════════════════════════════════════════════════╝
43
+ ```
44
+
45
+ ## Common False Completion Patterns
46
+
47
+ | Pattern | Reality | Fix |
48
+ |---------|---------|-----|
49
+ | "Command executed" | Exit code not checked | Check `$?` or tool output |
50
+ | "File created" | Content not verified | Read file back, verify content |
51
+ | "PR created" | CI not checked | Wait for CI, verify green |
52
+ | "Issue closed" | Related issues not updated | Check parent epic, cross-refs |
53
+ | "Tests pass" | Only ran subset | Run full test suite |
54
+
55
+ ## Completion Contract Format
56
+
57
+ For complex tasks, declare completion contract upfront:
58
+
59
+ ```
60
+ [Contract] Task: {name}
61
+ ├── Criterion 1: {specific, verifiable condition}
62
+ ├── Criterion 2: {specific, verifiable condition}
63
+ └── Criterion N: {specific, verifiable condition}
64
+ ```
65
+
66
+ Then at completion:
67
+
68
+ ```
69
+ [Done] Task: {name}
70
+ ├── ✓ Criterion 1: {evidence}
71
+ ├── ✓ Criterion 2: {evidence}
72
+ └── ✓ Criterion N: {evidence}
73
+ ```
74
+
75
+ ## Integration
76
+
77
+ | Rule | Interaction |
78
+ |------|-------------|
79
+ | R003 | [Done] status format now requires verification evidence |
80
+ | R010 | Orchestrator verifies subagent completion claims |
81
+ | R017 | Structural changes require sauron verification before [Done] |
@@ -14,6 +14,7 @@ Rule: If native auto memory can handle it, do NOT use claude-mem.
14
14
  Agent frontmatter `memory: project|user|local` enables persistent memory:
15
15
  - System creates memory directory, loads first 200 lines of MEMORY.md into prompt
16
16
  - Read/Write/Edit tools auto-enabled for memory directory
17
+ - Custom directory: set `autoMemoryDirectory` in settings to override default paths (v2.1.74+)
17
18
 
18
19
  | Scope | Location | Git Tracked |
19
20
  |-------|----------|-------------|
@@ -71,6 +72,45 @@ Memory entries in MEMORY.md should include confidence annotations to distinguish
71
72
  [any] → contradicted by evidence → demoted or removed
72
73
  ```
73
74
 
75
+ ### Temporal Decay
76
+
77
+ Memory entries include an optional verification timestamp for decay tracking:
78
+
79
+ **Format**: `[confidence: high, verified: 2026-03-15]`
80
+
81
+ | Age (unverified) | Action |
82
+ |-------------------|--------|
83
+ | 0-30 days | No change — entry is fresh |
84
+ | 30-60 days | Demote one level (high→medium, medium→low) |
85
+ | 60-90 days | Demote again if not re-verified |
86
+ | 90+ days | Removal candidate — flag for review |
87
+
88
+ **Decay Schedule**:
89
+ ```
90
+ Day 0: [confidence: high, verified: 2026-03-15]
91
+ Day 30: [confidence: high, verified: 2026-03-15] ← still within window
92
+ Day 31: [confidence: medium, verified: 2026-03-15] ← auto-demoted
93
+ Day 61: [confidence: low, verified: 2026-03-15] ← demoted again
94
+ Day 91: [REVIEW NEEDED, verified: 2026-03-15] ← flagged
95
+ ```
96
+
97
+ **Re-verification**: Any session that confirms a memory entry resets the verified date:
98
+ ```
99
+ Before: [confidence: medium, verified: 2026-01-15]
100
+ Action: Pattern confirmed in session
101
+ After: [confidence: high, verified: 2026-03-15]
102
+ ```
103
+
104
+ **Enforcement**: sys-memory-keeper checks decay at session start and end:
105
+ 1. Session start: scan MEMORY.md for entries past decay threshold
106
+ 2. Flag stale entries with `[STALE]` prefix
107
+ 3. Session end: remove or demote unconfirmed stale entries
108
+
109
+ **Exceptions**: Entries marked `[permanent]` are exempt from decay:
110
+ ```
111
+ ### Architecture Decisions [confidence: high, permanent]
112
+ ```
113
+
74
114
  ## Behavioral Memory
75
115
 
76
116
  MEMORY.md supports an optional `## Behaviors` section for tracking user interaction preferences and workflow patterns.
@@ -140,6 +180,47 @@ When sys-memory-keeper updates MEMORY.md at session end:
140
180
  2. Findings that match existing entries → promote confidence
141
181
  3. Findings that contradict existing entries → flag for review
142
182
 
183
+ ## Agent Metrics
184
+
185
+ MEMORY.md supports an optional `## Metrics` section for tracking per-agent-type performance data.
186
+
187
+ ### Metrics Section Format
188
+
189
+ ```markdown
190
+ ## Metrics [auto-updated by sys-memory-keeper]
191
+
192
+ | Agent Type | Tasks | Success Rate | Avg Model | Last Used |
193
+ |------------|-------|-------------|-----------|-----------|
194
+ | lang-golang-expert | 12 | 92% | sonnet | 2026-03-15 |
195
+ | mgr-gitnerd | 8 | 100% | sonnet | 2026-03-15 |
196
+ ```
197
+
198
+ ### Metrics Collection
199
+
200
+ sys-memory-keeper aggregates metrics at session end:
201
+
202
+ 1. Read `/tmp/.claude-task-outcomes-${PPID}` (JSONL from task-outcome-recorder hook)
203
+ 2. Parse each entry: `{agent_type, outcome, model, timestamp}`
204
+ 3. Aggregate by agent_type: total tasks, success count, model distribution
205
+ 4. Merge with existing Metrics table in MEMORY.md
206
+ 5. Budget: max 20 rows (prune lowest-usage agents when exceeded)
207
+
208
+ ### Metrics Fields
209
+
210
+ | Field | Source | Calculation |
211
+ |-------|--------|-------------|
212
+ | Tasks | task-outcome-recorder JSONL | Count of entries per agent_type |
213
+ | Success Rate | outcome field | `success_count / total_count * 100` |
214
+ | Avg Model | model field | Most frequently used model |
215
+ | Last Used | timestamp field | Most recent invocation |
216
+
217
+ ### Budget Management
218
+
219
+ The Metrics section shares the 200-line MEMORY.md budget:
220
+ 1. Max 20 agent rows in Metrics table
221
+ 2. When adding new agent, prune agent with lowest task count
222
+ 3. Merge identical agent types across sessions (cumulative)
223
+
143
224
  ## Session-End Auto-Save
144
225
 
145
226
  ### Trigger