oh-my-customcode 0.37.2 → 0.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/README.md +20 -5
  2. package/dist/cli/index.js +1189 -99
  3. package/dist/index.js +4 -1
  4. package/package.json +3 -1
  5. package/templates/.claude/hooks/hooks.json +23 -11
  6. package/templates/.claude/hooks/scripts/context-budget-advisor.sh +1 -1
  7. package/templates/.claude/hooks/scripts/eval-core-batch-save.sh +23 -0
  8. package/templates/.claude/hooks/scripts/session-env-check.sh +20 -0
  9. package/templates/.claude/hooks/scripts/stuck-detector.sh +1 -1
  10. package/templates/.claude/hooks/scripts/task-outcome-recorder.sh +13 -1
  11. package/templates/.claude/rules/MAY-optimization.md +12 -0
  12. package/templates/.claude/rules/MUST-agent-design.md +45 -7
  13. package/templates/.claude/rules/MUST-completion-verification.md +81 -0
  14. package/templates/.claude/rules/SHOULD-memory-integration.md +81 -0
  15. package/templates/.claude/skills/de-lead-routing/SKILL.md +8 -92
  16. package/templates/.claude/skills/deep-plan/SKILL.md +55 -4
  17. package/templates/.claude/skills/dev-lead-routing/SKILL.md +9 -21
  18. package/templates/.claude/skills/dev-refactor/SKILL.md +34 -1
  19. package/templates/.claude/skills/evaluator-optimizer/SKILL.md +53 -0
  20. package/templates/.claude/skills/qa-lead-routing/SKILL.md +7 -242
  21. package/templates/.claude/skills/research/SKILL.md +74 -7
  22. package/templates/.claude/skills/sauron-watch/SKILL.md +81 -0
  23. package/templates/.claude/skills/secretary-routing/SKILL.md +3 -18
  24. package/templates/.claude/skills/structured-dev-cycle/SKILL.md +20 -3
  25. package/templates/guides/claude-code/index.yaml +5 -0
  26. package/templates/manifest.json +3 -3
  27. package/templates/.claude/hooks/scripts/session-compliance-report.sh +0 -65
package/dist/index.js CHANGED
@@ -544,7 +544,9 @@ function getDefaultConfig() {
544
544
  ".claude/agent-memory/",
545
545
  ".claude/agent-memory-local/"
546
546
  ],
547
- customComponents: []
547
+ customComponents: [],
548
+ domain: undefined,
549
+ teamMode: false
548
550
  };
549
551
  }
550
552
  function getDefaultPreferences() {
@@ -1387,6 +1389,7 @@ async function installEntryDocWithTracking(targetDir, options, result) {
1387
1389
  async function updateInstallConfig(targetDir, options, installedComponents) {
1388
1390
  const config = await loadConfig(targetDir);
1389
1391
  config.language = options.language ?? DEFAULT_LANGUAGE;
1392
+ config.domain = options.domain;
1390
1393
  config.installedAt = new Date().toISOString();
1391
1394
  config.installedComponents = installedComponents;
1392
1395
  await saveConfig(targetDir, config);
package/package.json CHANGED
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "name": "oh-my-customcode",
3
- "version": "0.37.2",
3
+ "workspaces": ["packages/*"],
4
+ "version": "0.38.0",
4
5
  "description": "Batteries-included agent harness for Claude Code",
5
6
  "type": "module",
6
7
  "bin": {
@@ -41,6 +42,7 @@
41
42
  "prepublishOnly": "bun run build && bun run test"
42
43
  },
43
44
  "dependencies": {
45
+ "@clack/prompts": "^1.1.0",
44
46
  "commander": "^14.0.2",
45
47
  "i18next": "^25.8.0",
46
48
  "yaml": "^2.8.2"
@@ -141,6 +141,18 @@
141
141
  "description": "Record agent outcomes on subagent completion (complements PostToolUse Agent matcher)"
142
142
  }
143
143
  ],
144
+ "PostCompact": [
145
+ {
146
+ "matcher": "*",
147
+ "hooks": [
148
+ {
149
+ "type": "prompt",
150
+ "prompt": "Context was just compacted. MANDATORY post-compaction steps: 1) All enforced rules (R007 Agent ID, R008 Tool ID, R009 Parallel, R010 Orchestrator, R018 Agent Teams) remain ACTIVE — compaction does NOT disable rules. 2) Your next response MUST include agent identification per R007. 3) Continue delegating ALL file modifications to subagents per R010. 4) Re-read CLAUDE.md if needed to refresh project-specific context."
151
+ }
152
+ ],
153
+ "description": "Reinforce enforced rules after context compaction — prevents rule amnesia (v2.1.76+)"
154
+ }
155
+ ],
144
156
  "PostToolUse": [
145
157
  {
146
158
  "matcher": "tool == \"Bash\"",
@@ -212,16 +224,6 @@
212
224
  ],
213
225
  "description": "Type check Python files with ty after edits"
214
226
  },
215
- {
216
- "matcher": "tool == \"Task\" || tool == \"Agent\"",
217
- "hooks": [
218
- {
219
- "type": "command",
220
- "command": "bash .claude/hooks/scripts/task-outcome-recorder.sh"
221
- }
222
- ],
223
- "description": "Record agent/task outcomes (success/failure) for model escalation decisions"
224
- },
225
227
  {
226
228
  "matcher": "tool == \"Edit\" || tool == \"Write\" || tool == \"Bash\" || tool == \"Task\" || tool == \"Agent\" || tool == \"Read\" || tool == \"Glob\" || tool == \"Grep\"",
227
229
  "hooks": [
@@ -294,6 +296,16 @@
294
296
  ],
295
297
  "description": "Final console.log audit and session diagnostics before session ends"
296
298
  },
299
+ {
300
+ "matcher": "*",
301
+ "hooks": [
302
+ {
303
+ "type": "command",
304
+ "command": "bash .claude/hooks/scripts/eval-core-batch-save.sh"
305
+ }
306
+ ],
307
+ "description": "Batch-save agent outcomes to eval-core DB on session end (advisory, exit 0)"
308
+ },
297
309
  {
298
310
  "matcher": "*",
299
311
  "hooks": [
@@ -302,7 +314,7 @@
302
314
  "prompt": "Session-end memory checkpoint (R011 enforcement). Check conversation history for these 2 steps: 1) sys-memory-keeper was delegated to update MEMORY.md 2) claude-mem save was attempted via ToolSearch + mcp__plugin_claude-mem_mcp-search__save_memory. Note: episodic-memory auto-indexes after session — no manual verification needed. Decision rules: If BOTH were attempted (success or failure both count): approve. If MCP tools are unavailable after ToolSearch attempt: approve with note. If session had no explicit session-end signal from user (quick question, no memory work): approve. If any step was NOT attempted despite user signaling session end: block with systemMessage listing the missing steps."
303
315
  }
304
316
  ],
305
- "description": "Enforce R011 session-end memory saves — block stop if claude-mem or episodic-memory saves were skipped"
317
+ "description": "Enforce R011 session-end memory saves — block stop if claude-mem save was skipped (episodic-memory auto-indexes)"
306
318
  }
307
319
  ]
308
320
  }
@@ -29,7 +29,7 @@ read_count=${read_count:-0}
29
29
  agent_count=${agent_count:-0}
30
30
 
31
31
  # Determine tool type from input
32
- TOOL=$(echo "$input" | jq -r '.tool // ""' 2>/dev/null || echo "")
32
+ TOOL=$(echo "$input" | jq -r '.tool_name // ""' 2>/dev/null || echo "")
33
33
  tool_count=$((tool_count + 1))
34
34
 
35
35
  case "$TOOL" in
@@ -0,0 +1,23 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+
4
+ # Eval-Core Batch Save on Session End (Advisory Only)
5
+ # Trigger: Stop hook
6
+ # Purpose: Auto-collect eval metrics on session end via eval-core CLI
7
+ # Protocol: stdin JSON -> process -> stdout pass-through, exit 0 always
8
+ #
9
+ # This hook is advisory-only and never blocks session termination.
10
+ # If eval-core is unavailable or collection fails, the session continues normally.
11
+
12
+ input=$(cat)
13
+ PPID_FILE="/tmp/.claude-task-outcomes-${PPID}"
14
+
15
+ # Only attempt collection if outcome file exists and eval-core is available
16
+ if [ -f "$PPID_FILE" ] && command -v eval-core >/dev/null 2>&1; then
17
+ echo "[Hook] Collecting eval metrics via eval-core..." >&2
18
+ eval-core collect --ppid "$PPID" 2>/dev/null || true
19
+ fi
20
+
21
+ # Always pass through input and exit 0 (advisory only)
22
+ echo "$input"
23
+ exit 0
@@ -82,6 +82,20 @@ if command -v git >/dev/null 2>&1 && git rev-parse --is-inside-work-tree >/dev/n
82
82
  fi
83
83
  fi
84
84
 
85
+ # --- CI Status Check ---
86
+ # Check last CI run status if gh CLI is available
87
+ if command -v gh &>/dev/null; then
88
+ ci_status=$(gh run list --limit 1 --json conclusion -q '.[0].conclusion' 2>/dev/null || echo "unknown")
89
+ ci_name=$(gh run list --limit 1 --json name -q '.[0].name' 2>/dev/null || echo "unknown")
90
+ if [ "$ci_status" = "failure" ]; then
91
+ echo "[Session] ⚠ WARNING: Last CI run FAILED (${ci_name}) — check before pushing" >&2
92
+ elif [ "$ci_status" = "success" ]; then
93
+ echo "[Session] CI: last run passed (${ci_name})" >&2
94
+ elif [ "$ci_status" != "unknown" ]; then
95
+ echo "[Session] CI: last run status: ${ci_status} (${ci_name})" >&2
96
+ fi
97
+ fi
98
+
85
99
  # Update availability check (local cache only — no network calls)
86
100
  OMCUSTOM_UPDATE_STATUS="unknown"
87
101
  INSTALLED_VERSION=""
@@ -173,6 +187,12 @@ case "$DRIFT_STATUS" in
173
187
  esac
174
188
  echo "------------------------------------" >&2
175
189
 
190
+ # SessionEnd hooks timeout (v2.1.74+)
191
+ if [ -z "${CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS:-}" ]; then
192
+ echo "[SessionEnv] ⚠ CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS not set (default: 1500ms)" >&2
193
+ echo "[SessionEnv] Recommend: export CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS=10000" >&2
194
+ fi
195
+
176
196
  # Update Check report
177
197
  echo "" >&2
178
198
  echo " [Update Check]" >&2
@@ -9,7 +9,7 @@ set -euo pipefail
9
9
  # - exit 1: hard block (extreme stuck loops, >= HARD_BLOCK_THRESHOLD repetitions)
10
10
 
11
11
  # Hard block threshold: consecutive identical operations before blocking
12
- HARD_BLOCK_THRESHOLD=5
12
+ HARD_BLOCK_THRESHOLD=${CLAUDE_STUCK_THRESHOLD:-3}
13
13
 
14
14
  input=$(cat)
15
15
 
@@ -13,6 +13,17 @@ agent_type=$(echo "$input" | jq -r '.tool_input.subagent_type // .agent_type //
13
13
  model=$(echo "$input" | jq -r '.tool_input.model // .model // "inherit"')
14
14
  description=$(echo "$input" | jq -r '.tool_input.description // .description // ""' | head -c 80)
15
15
 
16
+ # Extract skill name from description or prompt
17
+ skill_name=""
18
+ if echo "$description" | grep -qiE '(skill:|routing|→.*skill)'; then
19
+ skill_name=$(echo "$description" | grep -oiE '[a-z]+-[a-z]+(-[a-z]+)*-?(routing|skill|practices|detection|decomposition|orchestration|pipeline|guards|cycle|plan|review|refactor|publish|version|audit|exec|analyze|bundle|report|setup|watch|lists|status|help|save|recall)' | head -1)
20
+ fi
21
+ # Fallback: check prompt field for "Skill: {name}" pattern
22
+ if [ -z "$skill_name" ]; then
23
+ prompt=$(echo "$input" | jq -r '.tool_input.prompt // ""' | head -c 500)
24
+ skill_name=$(echo "$prompt" | grep -oiE 'Skill:\s*[a-z]+-[a-z]+(-[a-z]+)*' | sed 's/[Ss]kill:\s*//' | head -1)
25
+ fi
26
+
16
27
  # Determine outcome
17
28
  is_error=$(echo "$input" | jq -r '.tool_output.is_error // false')
18
29
 
@@ -61,9 +72,10 @@ entry=$(jq -n \
61
72
  --arg model "$model" \
62
73
  --arg outcome "$outcome" \
63
74
  --arg pattern "$pattern" \
75
+ --arg skill "$skill_name" \
64
76
  --arg desc "$description" \
65
77
  --arg err "$error_summary" \
66
- '{timestamp: $ts, agent_type: $agent, model: $model, outcome: $outcome, pattern_used: $pattern, description: $desc, error_summary: $err}')
78
+ '{timestamp: $ts, agent_type: $agent, model: $model, outcome: $outcome, pattern_used: $pattern, skill: $skill, description: $desc, error_summary: $err}')
67
79
 
68
80
  echo "$entry" >> "$OUTCOME_FILE"
69
81
 
@@ -27,3 +27,15 @@
27
27
  | Repetitive tasks, clear bottleneck, measurable gain | One-time tasks, already fast, complexity > benefit |
28
28
 
29
29
  Readability > Optimization. No optimization without measurement.
30
+
31
+ ## CLAUDE.md Context Optimization (v2.1.72+)
32
+
33
+ HTML comments in CLAUDE.md are hidden from the model during auto-injection but visible via Read tool.
34
+
35
+ | Use Case | Example |
36
+ |----------|---------|
37
+ | Metadata tags | `<!-- agents: 44, skills: 74 -->` |
38
+ | Validation checksums | `<!-- validate-docs: hash=abc123 -->` |
39
+ | Conditional context | `<!-- detailed-architecture: see guides/architecture/ -->` |
40
+
41
+ **Rule**: Move model-unnecessary metadata into HTML comments to reduce context token usage. Keep actionable instructions as visible text.
@@ -11,7 +11,7 @@ Location: `.claude/agents/{name}.md` (single file, kebab-case)
11
11
  ```yaml
12
12
  name: agent-name # Unique identifier (kebab-case)
13
13
  description: Brief desc # One-line summary
14
- model: sonnet # sonnet | opus | haiku
14
+ model: sonnet # sonnet | opus | haiku (or full ID: claude-sonnet-4-6)
15
15
  tools: [Read, Write, ...] # Allowed tools
16
16
  ```
17
17
 
@@ -31,9 +31,10 @@ escalation: # Model escalation policy (optional)
31
31
  path: haiku → sonnet → opus # Escalation sequence
32
32
  threshold: 2 # Failures before advisory
33
33
  soul: true # Enable SOUL.md identity injection
34
- isolation: worktree # Run in isolated git worktree
34
+ isolation: worktree | sandbox # worktree = git worktree, sandbox = restricted bash
35
35
  background: true # Run in background
36
36
  maxTurns: 10 # Max conversation turns
37
+ maxTokens: 100000 # Per-turn token ceiling
37
38
  mcpServers: [server-1] # MCP servers available
38
39
  hooks: # Agent-specific hooks
39
40
  PreToolUse:
@@ -41,9 +42,33 @@ hooks: # Agent-specific hooks
41
42
  command: "echo hook"
42
43
  permissionMode: bypassPermissions # Permission mode
43
44
  disallowedTools: [Bash] # Tools to disallow
45
+ limitations: # Negative capability declarations
46
+ - "cannot execute tests"
47
+ - "cannot modify code"
48
+ domain: backend # backend | frontend | data-engineering | devops | universal
44
49
  ```
45
50
 
46
- > **Note**: `isolation`, `background`, `maxTurns`, `mcpServers`, `hooks`, `permissionMode`, `disallowedTools` are supported in Claude Code v2.1.63+.
51
+ > **Note**: `isolation`, `background`, `maxTurns`, `maxTokens`, `mcpServers`, `hooks`, `permissionMode`, `disallowedTools`, `limitations` are supported in Claude Code v2.1.63+. Hook types `PostCompact`, `Elicitation`, `ElicitationResult` require v2.1.76+.
52
+
53
+ ### Isolation Modes
54
+
55
+ | Mode | Behavior | Use Case |
56
+ |------|----------|----------|
57
+ | `worktree` | Isolated git worktree copy | Code changes that need rollback safety |
58
+ | `sandbox` | Restricted Bash environment | Agents running untrusted or scan commands |
59
+
60
+ When `isolation: sandbox` is set, the agent's Bash calls run with restricted permissions. This is advisory metadata — enforcement depends on the execution environment.
61
+
62
+ ### Token Ceiling
63
+
64
+ When `maxTokens` is set, it serves as advisory metadata for the orchestrator to manage agent turn budgets. The orchestrator should track output and consider escalation or task splitting when an agent approaches its ceiling.
65
+
66
+ ### Negative Capabilities (Limitations)
67
+
68
+ The `limitations` field declares what an agent explicitly CANNOT or SHOULD NOT do. This enables:
69
+ 1. **Clearer routing**: Orchestrator knows agent boundaries
70
+ 2. **Safer delegation**: Prevents accidental capability overreach
71
+ 3. **Better documentation**: Makes agent scope explicit
47
72
 
48
73
  ### Escalation Policy
49
74
 
@@ -144,6 +169,19 @@ user-invocable: false # Whether user can invoke directly
144
169
  disable-model-invocation: true # Prevent model from auto-invoking
145
170
  ```
146
171
 
172
+ ### Skill Effectiveness Tracking
173
+
174
+ Skills can optionally track effectiveness metrics via auto-populated fields:
175
+
176
+ ```yaml
177
+ effectiveness: # Auto-populated by sys-memory-keeper
178
+ invocations: 0 # Total invocation count across sessions
179
+ success_rate: 0.0 # Success rate (0.0-1.0)
180
+ last_invoked: "" # ISO-8601 timestamp
181
+ ```
182
+
183
+ These fields are read-only from the skill's perspective — sys-memory-keeper updates them at session end based on task-outcome-recorder data. They inform model selection, routing optimization, and skill maintenance priorities.
184
+
147
185
  ## Skill Scope
148
186
 
149
187
  | Scope | Purpose | Deployed via init? |
@@ -156,7 +194,7 @@ Default: `core` (when field is omitted)
156
194
 
157
195
  ### Context Fork Criteria
158
196
 
159
- Use `context: fork` for skills that orchestrate multi-agent workflows. Cap at **10 total** across the project.
197
+ Use `context: fork` for skills that orchestrate multi-agent workflows. Cap at **12 total** across the project.
160
198
 
161
199
  | Use `context: fork` | Do NOT use `context: fork` |
162
200
  |---------------------|---------------------------|
@@ -165,10 +203,10 @@ Use `context: fork` for skills that orchestrate multi-agent workflows. Cap at **
165
203
  | Multi-agent coordination patterns | Single-agent reference skills |
166
204
  | Task decomposition/planning | External tool integrations |
167
205
 
168
- Current skills with `context: fork` (8/10 cap):
206
+ Current skills with `context: fork` (11/12 cap):
169
207
  - secretary-routing, dev-lead-routing, de-lead-routing, qa-lead-routing
170
- - dag-orchestration, task-decomposition, worker-reviewer-pipeline
171
- - pipeline-guards
208
+ - dag-orchestration, task-decomposition, worker-reviewer-pipeline, pipeline-guards
209
+ - deep-plan, evaluator-optimizer, sauron-watch
172
210
 
173
211
  ## Naming
174
212
 
@@ -0,0 +1,81 @@
1
+ # [MUST] Completion Verification Rules
2
+
3
+ > **Priority**: MUST | **ID**: R020
4
+
5
+ ## Core Rule
6
+
7
+ Before declaring any task `[Done]`, verify completion against task-type-specific criteria. False completion declarations erode trust and cause downstream failures.
8
+
9
+ ## Task-Type Completion Matrix
10
+
11
+ | Task Type | REQUIRED Verification Before [Done] |
12
+ |-----------|-------------------------------------|
13
+ | Release | All issues closed, version bumped, PR merged, GitHub Release created |
14
+ | Implementation | Code compiles/passes lint, tests pass (if exist), no TODO markers left |
15
+ | Documentation | Links valid, counts accurate, cross-references updated |
16
+ | Git Operations | Operation succeeded (check exit code), working tree clean |
17
+ | Code Review | All findings addressed or explicitly deferred with justification |
18
+ | Agent/Skill Creation | Frontmatter valid, referenced skills exist, routing updated |
19
+
20
+ ## Self-Check (Before Declaring Done)
21
+
22
+ ```
23
+ ╔══════════════════════════════════════════════════════════════════╗
24
+ ║ BEFORE DECLARING [Done], ASK YOURSELF: ║
25
+ ║ ║
26
+ ║ 1. Did I verify the ACTUAL outcome (not just attempt)? ║
27
+ ║ "I ran the command" ≠ "the command succeeded" ║
28
+ ║ YES → Continue ║
29
+ ║ NO → Verify outcome first ║
30
+ ║ ║
31
+ ║ 2. Does the task type have specific criteria above? ║
32
+ ║ YES → Check each criterion ║
33
+ ║ NO → Apply general verification ║
34
+ ║ ║
35
+ ║ 3. Are there any unchecked items in the task's checklist? ║
36
+ ║ YES → Complete them or explicitly defer with reason ║
37
+ ║ NO → Good. Proceed to [Done] ║
38
+ ║ ║
39
+ ║ 4. Would I bet $100 this task is truly complete? ║
40
+ ║ YES → Declare [Done] ║
41
+ ║ NO → Identify what's uncertain and verify ║
42
+ ╚══════════════════════════════════════════════════════════════════╝
43
+ ```
44
+
45
+ ## Common False Completion Patterns
46
+
47
+ | Pattern | Reality | Fix |
48
+ |---------|---------|-----|
49
+ | "Command executed" | Exit code not checked | Check `$?` or tool output |
50
+ | "File created" | Content not verified | Read file back, verify content |
51
+ | "PR created" | CI not checked | Wait for CI, verify green |
52
+ | "Issue closed" | Related issues not updated | Check parent epic, cross-refs |
53
+ | "Tests pass" | Only ran subset | Run full test suite |
54
+
55
+ ## Completion Contract Format
56
+
57
+ For complex tasks, declare completion contract upfront:
58
+
59
+ ```
60
+ [Contract] Task: {name}
61
+ ├── Criterion 1: {specific, verifiable condition}
62
+ ├── Criterion 2: {specific, verifiable condition}
63
+ └── Criterion N: {specific, verifiable condition}
64
+ ```
65
+
66
+ Then at completion:
67
+
68
+ ```
69
+ [Done] Task: {name}
70
+ ├── ✓ Criterion 1: {evidence}
71
+ ├── ✓ Criterion 2: {evidence}
72
+ └── ✓ Criterion N: {evidence}
73
+ ```
74
+
75
+ ## Integration
76
+
77
+ | Rule | Interaction |
78
+ |------|-------------|
79
+ | R003 | [Done] status format now requires verification evidence |
80
+ | R010 | Orchestrator verifies subagent completion claims |
81
+ | R017 | Structural changes require sauron verification before [Done] |
@@ -14,6 +14,7 @@ Rule: If native auto memory can handle it, do NOT use claude-mem.
14
14
  Agent frontmatter `memory: project|user|local` enables persistent memory:
15
15
  - System creates memory directory, loads first 200 lines of MEMORY.md into prompt
16
16
  - Read/Write/Edit tools auto-enabled for memory directory
17
+ - Custom directory: set `autoMemoryDirectory` in settings to override default paths (v2.1.74+)
17
18
 
18
19
  | Scope | Location | Git Tracked |
19
20
  |-------|----------|-------------|
@@ -71,6 +72,45 @@ Memory entries in MEMORY.md should include confidence annotations to distinguish
71
72
  [any] → contradicted by evidence → demoted or removed
72
73
  ```
73
74
 
75
+ ### Temporal Decay
76
+
77
+ Memory entries include an optional verification timestamp for decay tracking:
78
+
79
+ **Format**: `[confidence: high, verified: 2026-03-15]`
80
+
81
+ | Age (unverified) | Action |
82
+ |-------------------|--------|
83
+ | 0-30 days | No change — entry is fresh |
84
+ | 30-60 days | Demote one level (high→medium, medium→low) |
85
+ | 60-90 days | Demote again if not re-verified |
86
+ | 90+ days | Removal candidate — flag for review |
87
+
88
+ **Decay Schedule**:
89
+ ```
90
+ Day 0: [confidence: high, verified: 2026-03-15]
91
+ Day 30: [confidence: high, verified: 2026-03-15] ← still within window
92
+ Day 31: [confidence: medium, verified: 2026-03-15] ← auto-demoted
93
+ Day 61: [confidence: low, verified: 2026-03-15] ← demoted again
94
+ Day 91: [REVIEW NEEDED, verified: 2026-03-15] ← flagged
95
+ ```
96
+
97
+ **Re-verification**: Any session that confirms a memory entry resets the verified date:
98
+ ```
99
+ Before: [confidence: medium, verified: 2026-01-15]
100
+ Action: Pattern confirmed in session
101
+ After: [confidence: high, verified: 2026-03-15]
102
+ ```
103
+
104
+ **Enforcement**: sys-memory-keeper checks decay at session start and end:
105
+ 1. Session start: scan MEMORY.md for entries past decay threshold
106
+ 2. Flag stale entries with `[STALE]` prefix
107
+ 3. Session end: remove or demote unconfirmed stale entries
108
+
109
+ **Exceptions**: Entries marked `[permanent]` are exempt from decay:
110
+ ```
111
+ ### Architecture Decisions [confidence: high, permanent]
112
+ ```
113
+
74
114
  ## Behavioral Memory
75
115
 
76
116
  MEMORY.md supports an optional `## Behaviors` section for tracking user interaction preferences and workflow patterns.
@@ -140,6 +180,47 @@ When sys-memory-keeper updates MEMORY.md at session end:
140
180
  2. Findings that match existing entries → promote confidence
141
181
  3. Findings that contradict existing entries → flag for review
142
182
 
183
+ ## Agent Metrics
184
+
185
+ MEMORY.md supports an optional `## Metrics` section for tracking per-agent-type performance data.
186
+
187
+ ### Metrics Section Format
188
+
189
+ ```markdown
190
+ ## Metrics [auto-updated by sys-memory-keeper]
191
+
192
+ | Agent Type | Tasks | Success Rate | Avg Model | Last Used |
193
+ |------------|-------|-------------|-----------|-----------|
194
+ | lang-golang-expert | 12 | 92% | sonnet | 2026-03-15 |
195
+ | mgr-gitnerd | 8 | 100% | sonnet | 2026-03-15 |
196
+ ```
197
+
198
+ ### Metrics Collection
199
+
200
+ sys-memory-keeper aggregates metrics at session end:
201
+
202
+ 1. Read `/tmp/.claude-task-outcomes-${PPID}` (JSONL from task-outcome-recorder hook)
203
+ 2. Parse each entry: `{agent_type, outcome, model, timestamp}`
204
+ 3. Aggregate by agent_type: total tasks, success count, model distribution
205
+ 4. Merge with existing Metrics table in MEMORY.md
206
+ 5. Budget: max 20 rows (prune lowest-usage agents when exceeded)
207
+
208
+ ### Metrics Fields
209
+
210
+ | Field | Source | Calculation |
211
+ |-------|--------|-------------|
212
+ | Tasks | task-outcome-recorder JSONL | Count of entries per agent_type |
213
+ | Success Rate | outcome field | `success_count / total_count * 100` |
214
+ | Avg Model | model field | Most frequently used model |
215
+ | Last Used | timestamp field | Most recent invocation |
216
+
217
+ ### Budget Management
218
+
219
+ The Metrics section shares the 200-line MEMORY.md budget:
220
+ 1. Max 20 agent rows in Metrics table
221
+ 2. When adding new agent, prune agent with lowest task count
222
+ 3. Merge identical agent types across sessions (cumulative)
223
+
143
224
  ## Session-End Auto-Save
144
225
 
145
226
  ### Trigger
@@ -65,8 +65,11 @@ Check if Agent Teams is available (`CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1` or T
65
65
  For **new pipeline code**, **DAG scaffolding**, or **SQL model generation**:
66
66
 
67
67
  1. Check `/tmp/.claude-env-status-*` for codex availability
68
- 2. If codex available → suggest hybrid workflow for code generation
69
- 3. If codex unavailable use DE expert directly
68
+ 2. If codex available AND task involves new file creation automatically delegate to `/codex-exec` for scaffolding:
69
+ - Display: `[Codex Hybrid] Delegating to codex-exec...`
70
+ - codex-exec generates initial code (strength: fast generation)
71
+ - Selected DE expert reviews and refines codex output (strength: reasoning, quality)
72
+ 3. If codex unavailable → display `[Codex] Unavailable — proceeding with {expert} directly` and use DE expert directly
70
73
 
71
74
  **Suitable**: New DAG files, dbt model scaffolding, SQL template generation
72
75
  **Unsuitable**: Existing pipeline modification, architecture decisions, data quality analysis
@@ -76,26 +79,11 @@ Route to appropriate DE expert based on tool/framework detection.
76
79
 
77
80
  ### Step 4: Ontology-RAG Enrichment (R019)
78
81
 
79
- After agent selection, enrich the spawned agent's prompt with ontology context:
82
+ If `get_agent_for_task` MCP tool is available, call it with the original query and inject `suggested_skills` into the agent prompt. Skip silently on failure.
80
83
 
81
- 1. Call `get_agent_for_task(original_query)` via MCP
82
- 2. Extract `suggested_skills` from response
83
- 3. If `suggested_skills` non-empty, prepend to spawned agent prompt:
84
- `"Ontology context suggests these skills may be relevant: {suggested_skills}"`
85
- 4. On MCP failure: skip silently, proceed with unmodified prompt
84
+ ### Step 5: Soul Injection (R006)
86
85
 
87
- **This step is advisory only it never changes which agent is selected.**
88
-
89
- ### Step 5: Soul Injection
90
-
91
- If the selected agent has `soul: true` in its frontmatter:
92
-
93
- 1. Read `.claude/agents/souls/{agent-name}.soul.md`
94
- 2. If file exists, prepend soul content to the agent's prompt:
95
- `"Identity context:\n{soul content}\n\n---\n\n"`
96
- 3. If file doesn't exist → skip silently (no error, no injection)
97
-
98
- **This step runs after ontology-RAG enrichment. Soul content is identity context, not capability instructions.**
86
+ If the selected agent has `soul: true` in frontmatter, read and prepend `.claude/agents/souls/{agent-name}.soul.md` content to the prompt. Skip silently if file doesn't exist.
99
87
 
100
88
  ## Command Routing
101
89
 
@@ -199,78 +187,6 @@ For projects spanning multiple DE tools:
199
187
  | de-kafka-expert | `sonnet` | `opus` for topology design |
200
188
  | de-snowflake-expert | `sonnet` | `opus` for warehouse design |
201
189
 
202
- ### Agent Call Examples
203
-
204
- ```
205
- # Complex pipeline architecture
206
- Agent(
207
- subagent_type: "general-purpose",
208
- prompt: "Design end-to-end pipeline architecture following de-pipeline-expert guidelines",
209
- model: "opus"
210
- )
211
-
212
- # Standard DAG review
213
- Agent(
214
- subagent_type: "general-purpose",
215
- prompt: "Review Airflow DAGs in dags/ following de-airflow-expert guidelines",
216
- model: "sonnet"
217
- )
218
-
219
- # Quick dbt test validation
220
- Agent(
221
- subagent_type: "Explore",
222
- prompt: "Find all dbt models missing schema tests",
223
- model: "haiku"
224
- )
225
- ```
226
-
227
- ## Parallel Execution
228
-
229
- Following R009:
230
- - Maximum 4 parallel instances
231
- - Independent tool/module operations
232
- - Coordinate cross-tool consistency
233
-
234
- Example:
235
- ```
236
- User: "Review all DE configs"
237
-
238
- Detection:
239
- - dags/ → de-airflow-expert
240
- - models/ → de-dbt-expert
241
- - kafka/ → de-kafka-expert
242
-
243
- Route (parallel):
244
- Agent(de-airflow-expert role → review dags/, model: "sonnet")
245
- Agent(de-dbt-expert role → review models/, model: "sonnet")
246
- Agent(de-kafka-expert role → review kafka/, model: "sonnet")
247
- ```
248
-
249
- ## Display Format
250
-
251
- ```
252
- [Analyzing] Detected: Airflow, dbt, Snowflake
253
-
254
- [Delegating] de-airflow-expert:sonnet → DAG design
255
- [Delegating] de-dbt-expert:sonnet → Model structure
256
- [Delegating] de-snowflake-expert:sonnet → Warehouse config
257
-
258
- [Progress] ███████████░ 2/3 experts completed
259
-
260
- [Summary]
261
- Airflow: DAG with 5 tasks designed
262
- dbt: 12 models across 3 layers
263
- Snowflake: Warehouse + schema configured
264
-
265
- Pipeline design completed.
266
- ```
267
-
268
- ## Integration with Other Routing Skills
269
-
270
- - **dev-lead-routing**: Hands off to DE lead when data engineering keywords detected
271
- - **secretary-routing**: DE agents accessible through secretary for management tasks
272
- - **qa-lead-routing**: Coordinates with QA for data quality testing
273
-
274
190
  ## No Match Fallback
275
191
 
276
192
  When a data engineering tool is detected but no matching agent exists: