oh-my-customcode 0.37.2 → 0.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -5
- package/dist/cli/index.js +1189 -99
- package/dist/index.js +4 -1
- package/package.json +3 -1
- package/templates/.claude/hooks/hooks.json +23 -11
- package/templates/.claude/hooks/scripts/context-budget-advisor.sh +1 -1
- package/templates/.claude/hooks/scripts/eval-core-batch-save.sh +23 -0
- package/templates/.claude/hooks/scripts/session-env-check.sh +20 -0
- package/templates/.claude/hooks/scripts/stuck-detector.sh +1 -1
- package/templates/.claude/hooks/scripts/task-outcome-recorder.sh +13 -1
- package/templates/.claude/rules/MAY-optimization.md +12 -0
- package/templates/.claude/rules/MUST-agent-design.md +45 -7
- package/templates/.claude/rules/MUST-completion-verification.md +81 -0
- package/templates/.claude/rules/SHOULD-memory-integration.md +81 -0
- package/templates/.claude/skills/de-lead-routing/SKILL.md +8 -92
- package/templates/.claude/skills/deep-plan/SKILL.md +55 -4
- package/templates/.claude/skills/dev-lead-routing/SKILL.md +9 -21
- package/templates/.claude/skills/dev-refactor/SKILL.md +34 -1
- package/templates/.claude/skills/evaluator-optimizer/SKILL.md +53 -0
- package/templates/.claude/skills/qa-lead-routing/SKILL.md +7 -242
- package/templates/.claude/skills/research/SKILL.md +74 -7
- package/templates/.claude/skills/sauron-watch/SKILL.md +81 -0
- package/templates/.claude/skills/secretary-routing/SKILL.md +3 -18
- package/templates/.claude/skills/structured-dev-cycle/SKILL.md +20 -3
- package/templates/guides/claude-code/index.yaml +5 -0
- package/templates/manifest.json +3 -3
- package/templates/.claude/hooks/scripts/session-compliance-report.sh +0 -65
package/dist/index.js
CHANGED
|
@@ -544,7 +544,9 @@ function getDefaultConfig() {
|
|
|
544
544
|
".claude/agent-memory/",
|
|
545
545
|
".claude/agent-memory-local/"
|
|
546
546
|
],
|
|
547
|
-
customComponents: []
|
|
547
|
+
customComponents: [],
|
|
548
|
+
domain: undefined,
|
|
549
|
+
teamMode: false
|
|
548
550
|
};
|
|
549
551
|
}
|
|
550
552
|
function getDefaultPreferences() {
|
|
@@ -1387,6 +1389,7 @@ async function installEntryDocWithTracking(targetDir, options, result) {
|
|
|
1387
1389
|
async function updateInstallConfig(targetDir, options, installedComponents) {
|
|
1388
1390
|
const config = await loadConfig(targetDir);
|
|
1389
1391
|
config.language = options.language ?? DEFAULT_LANGUAGE;
|
|
1392
|
+
config.domain = options.domain;
|
|
1390
1393
|
config.installedAt = new Date().toISOString();
|
|
1391
1394
|
config.installedComponents = installedComponents;
|
|
1392
1395
|
await saveConfig(targetDir, config);
|
package/package.json
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "oh-my-customcode",
|
|
3
|
-
"
|
|
3
|
+
"workspaces": ["packages/*"],
|
|
4
|
+
"version": "0.38.0",
|
|
4
5
|
"description": "Batteries-included agent harness for Claude Code",
|
|
5
6
|
"type": "module",
|
|
6
7
|
"bin": {
|
|
@@ -41,6 +42,7 @@
|
|
|
41
42
|
"prepublishOnly": "bun run build && bun run test"
|
|
42
43
|
},
|
|
43
44
|
"dependencies": {
|
|
45
|
+
"@clack/prompts": "^1.1.0",
|
|
44
46
|
"commander": "^14.0.2",
|
|
45
47
|
"i18next": "^25.8.0",
|
|
46
48
|
"yaml": "^2.8.2"
|
|
@@ -141,6 +141,18 @@
|
|
|
141
141
|
"description": "Record agent outcomes on subagent completion (complements PostToolUse Agent matcher)"
|
|
142
142
|
}
|
|
143
143
|
],
|
|
144
|
+
"PostCompact": [
|
|
145
|
+
{
|
|
146
|
+
"matcher": "*",
|
|
147
|
+
"hooks": [
|
|
148
|
+
{
|
|
149
|
+
"type": "prompt",
|
|
150
|
+
"prompt": "Context was just compacted. MANDATORY post-compaction steps: 1) All enforced rules (R007 Agent ID, R008 Tool ID, R009 Parallel, R010 Orchestrator, R018 Agent Teams) remain ACTIVE — compaction does NOT disable rules. 2) Your next response MUST include agent identification per R007. 3) Continue delegating ALL file modifications to subagents per R010. 4) Re-read CLAUDE.md if needed to refresh project-specific context."
|
|
151
|
+
}
|
|
152
|
+
],
|
|
153
|
+
"description": "Reinforce enforced rules after context compaction — prevents rule amnesia (v2.1.76+)"
|
|
154
|
+
}
|
|
155
|
+
],
|
|
144
156
|
"PostToolUse": [
|
|
145
157
|
{
|
|
146
158
|
"matcher": "tool == \"Bash\"",
|
|
@@ -212,16 +224,6 @@
|
|
|
212
224
|
],
|
|
213
225
|
"description": "Type check Python files with ty after edits"
|
|
214
226
|
},
|
|
215
|
-
{
|
|
216
|
-
"matcher": "tool == \"Task\" || tool == \"Agent\"",
|
|
217
|
-
"hooks": [
|
|
218
|
-
{
|
|
219
|
-
"type": "command",
|
|
220
|
-
"command": "bash .claude/hooks/scripts/task-outcome-recorder.sh"
|
|
221
|
-
}
|
|
222
|
-
],
|
|
223
|
-
"description": "Record agent/task outcomes (success/failure) for model escalation decisions"
|
|
224
|
-
},
|
|
225
227
|
{
|
|
226
228
|
"matcher": "tool == \"Edit\" || tool == \"Write\" || tool == \"Bash\" || tool == \"Task\" || tool == \"Agent\" || tool == \"Read\" || tool == \"Glob\" || tool == \"Grep\"",
|
|
227
229
|
"hooks": [
|
|
@@ -294,6 +296,16 @@
|
|
|
294
296
|
],
|
|
295
297
|
"description": "Final console.log audit and session diagnostics before session ends"
|
|
296
298
|
},
|
|
299
|
+
{
|
|
300
|
+
"matcher": "*",
|
|
301
|
+
"hooks": [
|
|
302
|
+
{
|
|
303
|
+
"type": "command",
|
|
304
|
+
"command": "bash .claude/hooks/scripts/eval-core-batch-save.sh"
|
|
305
|
+
}
|
|
306
|
+
],
|
|
307
|
+
"description": "Batch-save agent outcomes to eval-core DB on session end (advisory, exit 0)"
|
|
308
|
+
},
|
|
297
309
|
{
|
|
298
310
|
"matcher": "*",
|
|
299
311
|
"hooks": [
|
|
@@ -302,7 +314,7 @@
|
|
|
302
314
|
"prompt": "Session-end memory checkpoint (R011 enforcement). Check conversation history for these 2 steps: 1) sys-memory-keeper was delegated to update MEMORY.md 2) claude-mem save was attempted via ToolSearch + mcp__plugin_claude-mem_mcp-search__save_memory. Note: episodic-memory auto-indexes after session — no manual verification needed. Decision rules: If BOTH were attempted (success or failure both count): approve. If MCP tools are unavailable after ToolSearch attempt: approve with note. If session had no explicit session-end signal from user (quick question, no memory work): approve. If any step was NOT attempted despite user signaling session end: block with systemMessage listing the missing steps."
|
|
303
315
|
}
|
|
304
316
|
],
|
|
305
|
-
"description": "Enforce R011 session-end memory saves — block stop if claude-mem
|
|
317
|
+
"description": "Enforce R011 session-end memory saves — block stop if claude-mem save was skipped (episodic-memory auto-indexes)"
|
|
306
318
|
}
|
|
307
319
|
]
|
|
308
320
|
}
|
|
@@ -29,7 +29,7 @@ read_count=${read_count:-0}
|
|
|
29
29
|
agent_count=${agent_count:-0}
|
|
30
30
|
|
|
31
31
|
# Determine tool type from input
|
|
32
|
-
TOOL=$(echo "$input" | jq -r '.
|
|
32
|
+
TOOL=$(echo "$input" | jq -r '.tool_name // ""' 2>/dev/null || echo "")
|
|
33
33
|
tool_count=$((tool_count + 1))
|
|
34
34
|
|
|
35
35
|
case "$TOOL" in
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
# Eval-Core Batch Save on Session End (Advisory Only)
|
|
5
|
+
# Trigger: Stop hook
|
|
6
|
+
# Purpose: Auto-collect eval metrics on session end via eval-core CLI
|
|
7
|
+
# Protocol: stdin JSON -> process -> stdout pass-through, exit 0 always
|
|
8
|
+
#
|
|
9
|
+
# This hook is advisory-only and never blocks session termination.
|
|
10
|
+
# If eval-core is unavailable or collection fails, the session continues normally.
|
|
11
|
+
|
|
12
|
+
input=$(cat)
|
|
13
|
+
PPID_FILE="/tmp/.claude-task-outcomes-${PPID}"
|
|
14
|
+
|
|
15
|
+
# Only attempt collection if outcome file exists and eval-core is available
|
|
16
|
+
if [ -f "$PPID_FILE" ] && command -v eval-core >/dev/null 2>&1; then
|
|
17
|
+
echo "[Hook] Collecting eval metrics via eval-core..." >&2
|
|
18
|
+
eval-core collect --ppid "$PPID" 2>/dev/null || true
|
|
19
|
+
fi
|
|
20
|
+
|
|
21
|
+
# Always pass through input and exit 0 (advisory only)
|
|
22
|
+
echo "$input"
|
|
23
|
+
exit 0
|
|
@@ -82,6 +82,20 @@ if command -v git >/dev/null 2>&1 && git rev-parse --is-inside-work-tree >/dev/n
|
|
|
82
82
|
fi
|
|
83
83
|
fi
|
|
84
84
|
|
|
85
|
+
# --- CI Status Check ---
|
|
86
|
+
# Check last CI run status if gh CLI is available
|
|
87
|
+
if command -v gh &>/dev/null; then
|
|
88
|
+
ci_status=$(gh run list --limit 1 --json conclusion -q '.[0].conclusion' 2>/dev/null || echo "unknown")
|
|
89
|
+
ci_name=$(gh run list --limit 1 --json name -q '.[0].name' 2>/dev/null || echo "unknown")
|
|
90
|
+
if [ "$ci_status" = "failure" ]; then
|
|
91
|
+
echo "[Session] ⚠ WARNING: Last CI run FAILED (${ci_name}) — check before pushing" >&2
|
|
92
|
+
elif [ "$ci_status" = "success" ]; then
|
|
93
|
+
echo "[Session] CI: last run passed (${ci_name})" >&2
|
|
94
|
+
elif [ "$ci_status" != "unknown" ]; then
|
|
95
|
+
echo "[Session] CI: last run status: ${ci_status} (${ci_name})" >&2
|
|
96
|
+
fi
|
|
97
|
+
fi
|
|
98
|
+
|
|
85
99
|
# Update availability check (local cache only — no network calls)
|
|
86
100
|
OMCUSTOM_UPDATE_STATUS="unknown"
|
|
87
101
|
INSTALLED_VERSION=""
|
|
@@ -173,6 +187,12 @@ case "$DRIFT_STATUS" in
|
|
|
173
187
|
esac
|
|
174
188
|
echo "------------------------------------" >&2
|
|
175
189
|
|
|
190
|
+
# SessionEnd hooks timeout (v2.1.74+)
|
|
191
|
+
if [ -z "${CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS:-}" ]; then
|
|
192
|
+
echo "[SessionEnv] ⚠ CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS not set (default: 1500ms)" >&2
|
|
193
|
+
echo "[SessionEnv] Recommend: export CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS=10000" >&2
|
|
194
|
+
fi
|
|
195
|
+
|
|
176
196
|
# Update Check report
|
|
177
197
|
echo "" >&2
|
|
178
198
|
echo " [Update Check]" >&2
|
|
@@ -9,7 +9,7 @@ set -euo pipefail
|
|
|
9
9
|
# - exit 1: hard block (extreme stuck loops, >= HARD_BLOCK_THRESHOLD repetitions)
|
|
10
10
|
|
|
11
11
|
# Hard block threshold: consecutive identical operations before blocking
|
|
12
|
-
HARD_BLOCK_THRESHOLD
|
|
12
|
+
HARD_BLOCK_THRESHOLD=${CLAUDE_STUCK_THRESHOLD:-3}
|
|
13
13
|
|
|
14
14
|
input=$(cat)
|
|
15
15
|
|
|
@@ -13,6 +13,17 @@ agent_type=$(echo "$input" | jq -r '.tool_input.subagent_type // .agent_type //
|
|
|
13
13
|
model=$(echo "$input" | jq -r '.tool_input.model // .model // "inherit"')
|
|
14
14
|
description=$(echo "$input" | jq -r '.tool_input.description // .description // ""' | head -c 80)
|
|
15
15
|
|
|
16
|
+
# Extract skill name from description or prompt
|
|
17
|
+
skill_name=""
|
|
18
|
+
if echo "$description" | grep -qiE '(skill:|routing|→.*skill)'; then
|
|
19
|
+
skill_name=$(echo "$description" | grep -oiE '[a-z]+-[a-z]+(-[a-z]+)*-?(routing|skill|practices|detection|decomposition|orchestration|pipeline|guards|cycle|plan|review|refactor|publish|version|audit|exec|analyze|bundle|report|setup|watch|lists|status|help|save|recall)' | head -1)
|
|
20
|
+
fi
|
|
21
|
+
# Fallback: check prompt field for "Skill: {name}" pattern
|
|
22
|
+
if [ -z "$skill_name" ]; then
|
|
23
|
+
prompt=$(echo "$input" | jq -r '.tool_input.prompt // ""' | head -c 500)
|
|
24
|
+
skill_name=$(echo "$prompt" | grep -oiE 'Skill:\s*[a-z]+-[a-z]+(-[a-z]+)*' | sed 's/[Ss]kill:\s*//' | head -1)
|
|
25
|
+
fi
|
|
26
|
+
|
|
16
27
|
# Determine outcome
|
|
17
28
|
is_error=$(echo "$input" | jq -r '.tool_output.is_error // false')
|
|
18
29
|
|
|
@@ -61,9 +72,10 @@ entry=$(jq -n \
|
|
|
61
72
|
--arg model "$model" \
|
|
62
73
|
--arg outcome "$outcome" \
|
|
63
74
|
--arg pattern "$pattern" \
|
|
75
|
+
--arg skill "$skill_name" \
|
|
64
76
|
--arg desc "$description" \
|
|
65
77
|
--arg err "$error_summary" \
|
|
66
|
-
'{timestamp: $ts, agent_type: $agent, model: $model, outcome: $outcome, pattern_used: $pattern, description: $desc, error_summary: $err}')
|
|
78
|
+
'{timestamp: $ts, agent_type: $agent, model: $model, outcome: $outcome, pattern_used: $pattern, skill: $skill, description: $desc, error_summary: $err}')
|
|
67
79
|
|
|
68
80
|
echo "$entry" >> "$OUTCOME_FILE"
|
|
69
81
|
|
|
@@ -27,3 +27,15 @@
|
|
|
27
27
|
| Repetitive tasks, clear bottleneck, measurable gain | One-time tasks, already fast, complexity > benefit |
|
|
28
28
|
|
|
29
29
|
Readability > Optimization. No optimization without measurement.
|
|
30
|
+
|
|
31
|
+
## CLAUDE.md Context Optimization (v2.1.72+)
|
|
32
|
+
|
|
33
|
+
HTML comments in CLAUDE.md are hidden from the model during auto-injection but visible via Read tool.
|
|
34
|
+
|
|
35
|
+
| Use Case | Example |
|
|
36
|
+
|----------|---------|
|
|
37
|
+
| Metadata tags | `<!-- agents: 44, skills: 74 -->` |
|
|
38
|
+
| Validation checksums | `<!-- validate-docs: hash=abc123 -->` |
|
|
39
|
+
| Conditional context | `<!-- detailed-architecture: see guides/architecture/ -->` |
|
|
40
|
+
|
|
41
|
+
**Rule**: Move model-unnecessary metadata into HTML comments to reduce context token usage. Keep actionable instructions as visible text.
|
|
@@ -11,7 +11,7 @@ Location: `.claude/agents/{name}.md` (single file, kebab-case)
|
|
|
11
11
|
```yaml
|
|
12
12
|
name: agent-name # Unique identifier (kebab-case)
|
|
13
13
|
description: Brief desc # One-line summary
|
|
14
|
-
model: sonnet # sonnet | opus | haiku
|
|
14
|
+
model: sonnet # sonnet | opus | haiku (or full ID: claude-sonnet-4-6)
|
|
15
15
|
tools: [Read, Write, ...] # Allowed tools
|
|
16
16
|
```
|
|
17
17
|
|
|
@@ -31,9 +31,10 @@ escalation: # Model escalation policy (optional)
|
|
|
31
31
|
path: haiku → sonnet → opus # Escalation sequence
|
|
32
32
|
threshold: 2 # Failures before advisory
|
|
33
33
|
soul: true # Enable SOUL.md identity injection
|
|
34
|
-
isolation: worktree
|
|
34
|
+
isolation: worktree | sandbox # worktree = git worktree, sandbox = restricted bash
|
|
35
35
|
background: true # Run in background
|
|
36
36
|
maxTurns: 10 # Max conversation turns
|
|
37
|
+
maxTokens: 100000 # Per-turn token ceiling
|
|
37
38
|
mcpServers: [server-1] # MCP servers available
|
|
38
39
|
hooks: # Agent-specific hooks
|
|
39
40
|
PreToolUse:
|
|
@@ -41,9 +42,33 @@ hooks: # Agent-specific hooks
|
|
|
41
42
|
command: "echo hook"
|
|
42
43
|
permissionMode: bypassPermissions # Permission mode
|
|
43
44
|
disallowedTools: [Bash] # Tools to disallow
|
|
45
|
+
limitations: # Negative capability declarations
|
|
46
|
+
- "cannot execute tests"
|
|
47
|
+
- "cannot modify code"
|
|
48
|
+
domain: backend # backend | frontend | data-engineering | devops | universal
|
|
44
49
|
```
|
|
45
50
|
|
|
46
|
-
> **Note**: `isolation`, `background`, `maxTurns`, `mcpServers`, `hooks`, `permissionMode`, `disallowedTools` are supported in Claude Code v2.1.63+.
|
|
51
|
+
> **Note**: `isolation`, `background`, `maxTurns`, `maxTokens`, `mcpServers`, `hooks`, `permissionMode`, `disallowedTools`, `limitations` are supported in Claude Code v2.1.63+. Hook types `PostCompact`, `Elicitation`, `ElicitationResult` require v2.1.76+.
|
|
52
|
+
|
|
53
|
+
### Isolation Modes
|
|
54
|
+
|
|
55
|
+
| Mode | Behavior | Use Case |
|
|
56
|
+
|------|----------|----------|
|
|
57
|
+
| `worktree` | Isolated git worktree copy | Code changes that need rollback safety |
|
|
58
|
+
| `sandbox` | Restricted Bash environment | Agents running untrusted or scan commands |
|
|
59
|
+
|
|
60
|
+
When `isolation: sandbox` is set, the agent's Bash calls run with restricted permissions. This is advisory metadata — enforcement depends on the execution environment.
|
|
61
|
+
|
|
62
|
+
### Token Ceiling
|
|
63
|
+
|
|
64
|
+
When `maxTokens` is set, it serves as advisory metadata for the orchestrator to manage agent turn budgets. The orchestrator should track output and consider escalation or task splitting when an agent approaches its ceiling.
|
|
65
|
+
|
|
66
|
+
### Negative Capabilities (Limitations)
|
|
67
|
+
|
|
68
|
+
The `limitations` field declares what an agent explicitly CANNOT or SHOULD NOT do. This enables:
|
|
69
|
+
1. **Clearer routing**: Orchestrator knows agent boundaries
|
|
70
|
+
2. **Safer delegation**: Prevents accidental capability overreach
|
|
71
|
+
3. **Better documentation**: Makes agent scope explicit
|
|
47
72
|
|
|
48
73
|
### Escalation Policy
|
|
49
74
|
|
|
@@ -144,6 +169,19 @@ user-invocable: false # Whether user can invoke directly
|
|
|
144
169
|
disable-model-invocation: true # Prevent model from auto-invoking
|
|
145
170
|
```
|
|
146
171
|
|
|
172
|
+
### Skill Effectiveness Tracking
|
|
173
|
+
|
|
174
|
+
Skills can optionally track effectiveness metrics via auto-populated fields:
|
|
175
|
+
|
|
176
|
+
```yaml
|
|
177
|
+
effectiveness: # Auto-populated by sys-memory-keeper
|
|
178
|
+
invocations: 0 # Total invocation count across sessions
|
|
179
|
+
success_rate: 0.0 # Success rate (0.0-1.0)
|
|
180
|
+
last_invoked: "" # ISO-8601 timestamp
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
These fields are read-only from the skill's perspective — sys-memory-keeper updates them at session end based on task-outcome-recorder data. They inform model selection, routing optimization, and skill maintenance priorities.
|
|
184
|
+
|
|
147
185
|
## Skill Scope
|
|
148
186
|
|
|
149
187
|
| Scope | Purpose | Deployed via init? |
|
|
@@ -156,7 +194,7 @@ Default: `core` (when field is omitted)
|
|
|
156
194
|
|
|
157
195
|
### Context Fork Criteria
|
|
158
196
|
|
|
159
|
-
Use `context: fork` for skills that orchestrate multi-agent workflows. Cap at **
|
|
197
|
+
Use `context: fork` for skills that orchestrate multi-agent workflows. Cap at **12 total** across the project.
|
|
160
198
|
|
|
161
199
|
| Use `context: fork` | Do NOT use `context: fork` |
|
|
162
200
|
|---------------------|---------------------------|
|
|
@@ -165,10 +203,10 @@ Use `context: fork` for skills that orchestrate multi-agent workflows. Cap at **
|
|
|
165
203
|
| Multi-agent coordination patterns | Single-agent reference skills |
|
|
166
204
|
| Task decomposition/planning | External tool integrations |
|
|
167
205
|
|
|
168
|
-
Current skills with `context: fork` (
|
|
206
|
+
Current skills with `context: fork` (11/12 cap):
|
|
169
207
|
- secretary-routing, dev-lead-routing, de-lead-routing, qa-lead-routing
|
|
170
|
-
- dag-orchestration, task-decomposition, worker-reviewer-pipeline
|
|
171
|
-
-
|
|
208
|
+
- dag-orchestration, task-decomposition, worker-reviewer-pipeline, pipeline-guards
|
|
209
|
+
- deep-plan, evaluator-optimizer, sauron-watch
|
|
172
210
|
|
|
173
211
|
## Naming
|
|
174
212
|
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# [MUST] Completion Verification Rules
|
|
2
|
+
|
|
3
|
+
> **Priority**: MUST | **ID**: R020
|
|
4
|
+
|
|
5
|
+
## Core Rule
|
|
6
|
+
|
|
7
|
+
Before declaring any task `[Done]`, verify completion against task-type-specific criteria. False completion declarations erode trust and cause downstream failures.
|
|
8
|
+
|
|
9
|
+
## Task-Type Completion Matrix
|
|
10
|
+
|
|
11
|
+
| Task Type | REQUIRED Verification Before [Done] |
|
|
12
|
+
|-----------|-------------------------------------|
|
|
13
|
+
| Release | All issues closed, version bumped, PR merged, GitHub Release created |
|
|
14
|
+
| Implementation | Code compiles/passes lint, tests pass (if exist), no TODO markers left |
|
|
15
|
+
| Documentation | Links valid, counts accurate, cross-references updated |
|
|
16
|
+
| Git Operations | Operation succeeded (check exit code), working tree clean |
|
|
17
|
+
| Code Review | All findings addressed or explicitly deferred with justification |
|
|
18
|
+
| Agent/Skill Creation | Frontmatter valid, referenced skills exist, routing updated |
|
|
19
|
+
|
|
20
|
+
## Self-Check (Before Declaring Done)
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
╔══════════════════════════════════════════════════════════════════╗
|
|
24
|
+
║ BEFORE DECLARING [Done], ASK YOURSELF: ║
|
|
25
|
+
║ ║
|
|
26
|
+
║ 1. Did I verify the ACTUAL outcome (not just attempt)? ║
|
|
27
|
+
║ "I ran the command" ≠ "the command succeeded" ║
|
|
28
|
+
║ YES → Continue ║
|
|
29
|
+
║ NO → Verify outcome first ║
|
|
30
|
+
║ ║
|
|
31
|
+
║ 2. Does the task type have specific criteria above? ║
|
|
32
|
+
║ YES → Check each criterion ║
|
|
33
|
+
║ NO → Apply general verification ║
|
|
34
|
+
║ ║
|
|
35
|
+
║ 3. Are there any unchecked items in the task's checklist? ║
|
|
36
|
+
║ YES → Complete them or explicitly defer with reason ║
|
|
37
|
+
║ NO → Good. Proceed to [Done] ║
|
|
38
|
+
║ ║
|
|
39
|
+
║ 4. Would I bet $100 this task is truly complete? ║
|
|
40
|
+
║ YES → Declare [Done] ║
|
|
41
|
+
║ NO → Identify what's uncertain and verify ║
|
|
42
|
+
╚══════════════════════════════════════════════════════════════════╝
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Common False Completion Patterns
|
|
46
|
+
|
|
47
|
+
| Pattern | Reality | Fix |
|
|
48
|
+
|---------|---------|-----|
|
|
49
|
+
| "Command executed" | Exit code not checked | Check `$?` or tool output |
|
|
50
|
+
| "File created" | Content not verified | Read file back, verify content |
|
|
51
|
+
| "PR created" | CI not checked | Wait for CI, verify green |
|
|
52
|
+
| "Issue closed" | Related issues not updated | Check parent epic, cross-refs |
|
|
53
|
+
| "Tests pass" | Only ran subset | Run full test suite |
|
|
54
|
+
|
|
55
|
+
## Completion Contract Format
|
|
56
|
+
|
|
57
|
+
For complex tasks, declare completion contract upfront:
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
[Contract] Task: {name}
|
|
61
|
+
├── Criterion 1: {specific, verifiable condition}
|
|
62
|
+
├── Criterion 2: {specific, verifiable condition}
|
|
63
|
+
└── Criterion N: {specific, verifiable condition}
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Then at completion:
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
[Done] Task: {name}
|
|
70
|
+
├── ✓ Criterion 1: {evidence}
|
|
71
|
+
├── ✓ Criterion 2: {evidence}
|
|
72
|
+
└── ✓ Criterion N: {evidence}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Integration
|
|
76
|
+
|
|
77
|
+
| Rule | Interaction |
|
|
78
|
+
|------|-------------|
|
|
79
|
+
| R003 | [Done] status format now requires verification evidence |
|
|
80
|
+
| R010 | Orchestrator verifies subagent completion claims |
|
|
81
|
+
| R017 | Structural changes require sauron verification before [Done] |
|
|
@@ -14,6 +14,7 @@ Rule: If native auto memory can handle it, do NOT use claude-mem.
|
|
|
14
14
|
Agent frontmatter `memory: project|user|local` enables persistent memory:
|
|
15
15
|
- System creates memory directory, loads first 200 lines of MEMORY.md into prompt
|
|
16
16
|
- Read/Write/Edit tools auto-enabled for memory directory
|
|
17
|
+
- Custom directory: set `autoMemoryDirectory` in settings to override default paths (v2.1.74+)
|
|
17
18
|
|
|
18
19
|
| Scope | Location | Git Tracked |
|
|
19
20
|
|-------|----------|-------------|
|
|
@@ -71,6 +72,45 @@ Memory entries in MEMORY.md should include confidence annotations to distinguish
|
|
|
71
72
|
[any] → contradicted by evidence → demoted or removed
|
|
72
73
|
```
|
|
73
74
|
|
|
75
|
+
### Temporal Decay
|
|
76
|
+
|
|
77
|
+
Memory entries include an optional verification timestamp for decay tracking:
|
|
78
|
+
|
|
79
|
+
**Format**: `[confidence: high, verified: 2026-03-15]`
|
|
80
|
+
|
|
81
|
+
| Age (unverified) | Action |
|
|
82
|
+
|-------------------|--------|
|
|
83
|
+
| 0-30 days | No change — entry is fresh |
|
|
84
|
+
| 30-60 days | Demote one level (high→medium, medium→low) |
|
|
85
|
+
| 60-90 days | Demote again if not re-verified |
|
|
86
|
+
| 90+ days | Removal candidate — flag for review |
|
|
87
|
+
|
|
88
|
+
**Decay Schedule**:
|
|
89
|
+
```
|
|
90
|
+
Day 0: [confidence: high, verified: 2026-03-15]
|
|
91
|
+
Day 30: [confidence: high, verified: 2026-03-15] ← still within window
|
|
92
|
+
Day 31: [confidence: medium, verified: 2026-03-15] ← auto-demoted
|
|
93
|
+
Day 61: [confidence: low, verified: 2026-03-15] ← demoted again
|
|
94
|
+
Day 91: [REVIEW NEEDED, verified: 2026-03-15] ← flagged
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
**Re-verification**: Any session that confirms a memory entry resets the verified date:
|
|
98
|
+
```
|
|
99
|
+
Before: [confidence: medium, verified: 2026-01-15]
|
|
100
|
+
Action: Pattern confirmed in session
|
|
101
|
+
After: [confidence: high, verified: 2026-03-15]
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
**Enforcement**: sys-memory-keeper checks decay at session start and end:
|
|
105
|
+
1. Session start: scan MEMORY.md for entries past decay threshold
|
|
106
|
+
2. Flag stale entries with `[STALE]` prefix
|
|
107
|
+
3. Session end: remove or demote unconfirmed stale entries
|
|
108
|
+
|
|
109
|
+
**Exceptions**: Entries marked `[permanent]` are exempt from decay:
|
|
110
|
+
```
|
|
111
|
+
### Architecture Decisions [confidence: high, permanent]
|
|
112
|
+
```
|
|
113
|
+
|
|
74
114
|
## Behavioral Memory
|
|
75
115
|
|
|
76
116
|
MEMORY.md supports an optional `## Behaviors` section for tracking user interaction preferences and workflow patterns.
|
|
@@ -140,6 +180,47 @@ When sys-memory-keeper updates MEMORY.md at session end:
|
|
|
140
180
|
2. Findings that match existing entries → promote confidence
|
|
141
181
|
3. Findings that contradict existing entries → flag for review
|
|
142
182
|
|
|
183
|
+
## Agent Metrics
|
|
184
|
+
|
|
185
|
+
MEMORY.md supports an optional `## Metrics` section for tracking per-agent-type performance data.
|
|
186
|
+
|
|
187
|
+
### Metrics Section Format
|
|
188
|
+
|
|
189
|
+
```markdown
|
|
190
|
+
## Metrics [auto-updated by sys-memory-keeper]
|
|
191
|
+
|
|
192
|
+
| Agent Type | Tasks | Success Rate | Avg Model | Last Used |
|
|
193
|
+
|------------|-------|-------------|-----------|-----------|
|
|
194
|
+
| lang-golang-expert | 12 | 92% | sonnet | 2026-03-15 |
|
|
195
|
+
| mgr-gitnerd | 8 | 100% | sonnet | 2026-03-15 |
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### Metrics Collection
|
|
199
|
+
|
|
200
|
+
sys-memory-keeper aggregates metrics at session end:
|
|
201
|
+
|
|
202
|
+
1. Read `/tmp/.claude-task-outcomes-${PPID}` (JSONL from task-outcome-recorder hook)
|
|
203
|
+
2. Parse each entry: `{agent_type, outcome, model, timestamp}`
|
|
204
|
+
3. Aggregate by agent_type: total tasks, success count, model distribution
|
|
205
|
+
4. Merge with existing Metrics table in MEMORY.md
|
|
206
|
+
5. Budget: max 20 rows (prune lowest-usage agents when exceeded)
|
|
207
|
+
|
|
208
|
+
### Metrics Fields
|
|
209
|
+
|
|
210
|
+
| Field | Source | Calculation |
|
|
211
|
+
|-------|--------|-------------|
|
|
212
|
+
| Tasks | task-outcome-recorder JSONL | Count of entries per agent_type |
|
|
213
|
+
| Success Rate | outcome field | `success_count / total_count * 100` |
|
|
214
|
+
| Avg Model | model field | Most frequently used model |
|
|
215
|
+
| Last Used | timestamp field | Most recent invocation |
|
|
216
|
+
|
|
217
|
+
### Budget Management
|
|
218
|
+
|
|
219
|
+
The Metrics section shares the 200-line MEMORY.md budget:
|
|
220
|
+
1. Max 20 agent rows in Metrics table
|
|
221
|
+
2. When adding new agent, prune agent with lowest task count
|
|
222
|
+
3. Merge identical agent types across sessions (cumulative)
|
|
223
|
+
|
|
143
224
|
## Session-End Auto-Save
|
|
144
225
|
|
|
145
226
|
### Trigger
|
|
@@ -65,8 +65,11 @@ Check if Agent Teams is available (`CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1` or T
|
|
|
65
65
|
For **new pipeline code**, **DAG scaffolding**, or **SQL model generation**:
|
|
66
66
|
|
|
67
67
|
1. Check `/tmp/.claude-env-status-*` for codex availability
|
|
68
|
-
2. If codex available →
|
|
69
|
-
|
|
68
|
+
2. If codex available AND task involves new file creation → automatically delegate to `/codex-exec` for scaffolding:
|
|
69
|
+
- Display: `[Codex Hybrid] Delegating to codex-exec...`
|
|
70
|
+
- codex-exec generates initial code (strength: fast generation)
|
|
71
|
+
- Selected DE expert reviews and refines codex output (strength: reasoning, quality)
|
|
72
|
+
3. If codex unavailable → display `[Codex] Unavailable — proceeding with {expert} directly` and use DE expert directly
|
|
70
73
|
|
|
71
74
|
**Suitable**: New DAG files, dbt model scaffolding, SQL template generation
|
|
72
75
|
**Unsuitable**: Existing pipeline modification, architecture decisions, data quality analysis
|
|
@@ -76,26 +79,11 @@ Route to appropriate DE expert based on tool/framework detection.
|
|
|
76
79
|
|
|
77
80
|
### Step 4: Ontology-RAG Enrichment (R019)
|
|
78
81
|
|
|
79
|
-
|
|
82
|
+
If `get_agent_for_task` MCP tool is available, call it with the original query and inject `suggested_skills` into the agent prompt. Skip silently on failure.
|
|
80
83
|
|
|
81
|
-
|
|
82
|
-
2. Extract `suggested_skills` from response
|
|
83
|
-
3. If `suggested_skills` non-empty, prepend to spawned agent prompt:
|
|
84
|
-
`"Ontology context suggests these skills may be relevant: {suggested_skills}"`
|
|
85
|
-
4. On MCP failure: skip silently, proceed with unmodified prompt
|
|
84
|
+
### Step 5: Soul Injection (R006)
|
|
86
85
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
### Step 5: Soul Injection
|
|
90
|
-
|
|
91
|
-
If the selected agent has `soul: true` in its frontmatter:
|
|
92
|
-
|
|
93
|
-
1. Read `.claude/agents/souls/{agent-name}.soul.md`
|
|
94
|
-
2. If file exists, prepend soul content to the agent's prompt:
|
|
95
|
-
`"Identity context:\n{soul content}\n\n---\n\n"`
|
|
96
|
-
3. If file doesn't exist → skip silently (no error, no injection)
|
|
97
|
-
|
|
98
|
-
**This step runs after ontology-RAG enrichment. Soul content is identity context, not capability instructions.**
|
|
86
|
+
If the selected agent has `soul: true` in frontmatter, read and prepend `.claude/agents/souls/{agent-name}.soul.md` content to the prompt. Skip silently if file doesn't exist.
|
|
99
87
|
|
|
100
88
|
## Command Routing
|
|
101
89
|
|
|
@@ -199,78 +187,6 @@ For projects spanning multiple DE tools:
|
|
|
199
187
|
| de-kafka-expert | `sonnet` | `opus` for topology design |
|
|
200
188
|
| de-snowflake-expert | `sonnet` | `opus` for warehouse design |
|
|
201
189
|
|
|
202
|
-
### Agent Call Examples
|
|
203
|
-
|
|
204
|
-
```
|
|
205
|
-
# Complex pipeline architecture
|
|
206
|
-
Agent(
|
|
207
|
-
subagent_type: "general-purpose",
|
|
208
|
-
prompt: "Design end-to-end pipeline architecture following de-pipeline-expert guidelines",
|
|
209
|
-
model: "opus"
|
|
210
|
-
)
|
|
211
|
-
|
|
212
|
-
# Standard DAG review
|
|
213
|
-
Agent(
|
|
214
|
-
subagent_type: "general-purpose",
|
|
215
|
-
prompt: "Review Airflow DAGs in dags/ following de-airflow-expert guidelines",
|
|
216
|
-
model: "sonnet"
|
|
217
|
-
)
|
|
218
|
-
|
|
219
|
-
# Quick dbt test validation
|
|
220
|
-
Agent(
|
|
221
|
-
subagent_type: "Explore",
|
|
222
|
-
prompt: "Find all dbt models missing schema tests",
|
|
223
|
-
model: "haiku"
|
|
224
|
-
)
|
|
225
|
-
```
|
|
226
|
-
|
|
227
|
-
## Parallel Execution
|
|
228
|
-
|
|
229
|
-
Following R009:
|
|
230
|
-
- Maximum 4 parallel instances
|
|
231
|
-
- Independent tool/module operations
|
|
232
|
-
- Coordinate cross-tool consistency
|
|
233
|
-
|
|
234
|
-
Example:
|
|
235
|
-
```
|
|
236
|
-
User: "Review all DE configs"
|
|
237
|
-
|
|
238
|
-
Detection:
|
|
239
|
-
- dags/ → de-airflow-expert
|
|
240
|
-
- models/ → de-dbt-expert
|
|
241
|
-
- kafka/ → de-kafka-expert
|
|
242
|
-
|
|
243
|
-
Route (parallel):
|
|
244
|
-
Agent(de-airflow-expert role → review dags/, model: "sonnet")
|
|
245
|
-
Agent(de-dbt-expert role → review models/, model: "sonnet")
|
|
246
|
-
Agent(de-kafka-expert role → review kafka/, model: "sonnet")
|
|
247
|
-
```
|
|
248
|
-
|
|
249
|
-
## Display Format
|
|
250
|
-
|
|
251
|
-
```
|
|
252
|
-
[Analyzing] Detected: Airflow, dbt, Snowflake
|
|
253
|
-
|
|
254
|
-
[Delegating] de-airflow-expert:sonnet → DAG design
|
|
255
|
-
[Delegating] de-dbt-expert:sonnet → Model structure
|
|
256
|
-
[Delegating] de-snowflake-expert:sonnet → Warehouse config
|
|
257
|
-
|
|
258
|
-
[Progress] ███████████░ 2/3 experts completed
|
|
259
|
-
|
|
260
|
-
[Summary]
|
|
261
|
-
Airflow: DAG with 5 tasks designed
|
|
262
|
-
dbt: 12 models across 3 layers
|
|
263
|
-
Snowflake: Warehouse + schema configured
|
|
264
|
-
|
|
265
|
-
Pipeline design completed.
|
|
266
|
-
```
|
|
267
|
-
|
|
268
|
-
## Integration with Other Routing Skills
|
|
269
|
-
|
|
270
|
-
- **dev-lead-routing**: Hands off to DE lead when data engineering keywords detected
|
|
271
|
-
- **secretary-routing**: DE agents accessible through secretary for management tasks
|
|
272
|
-
- **qa-lead-routing**: Coordinates with QA for data quality testing
|
|
273
|
-
|
|
274
190
|
## No Match Fallback
|
|
275
191
|
|
|
276
192
|
When a data engineering tool is detected but no matching agent exists:
|