oh-my-customcode 0.37.2 → 0.39.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -5
- package/dist/cli/index.js +1189 -99
- package/dist/index.js +4 -1
- package/package.json +3 -1
- package/templates/.claude/hooks/hooks.json +24 -12
- package/templates/.claude/hooks/scripts/agent-teams-advisor.sh +3 -0
- package/templates/.claude/hooks/scripts/audit-log.sh +3 -0
- package/templates/.claude/hooks/scripts/content-hash-validator.sh +3 -0
- package/templates/.claude/hooks/scripts/context-budget-advisor.sh +4 -1
- package/templates/.claude/hooks/scripts/eval-core-batch-save.sh +23 -0
- package/templates/.claude/hooks/scripts/git-delegation-guard.sh +3 -0
- package/templates/.claude/hooks/scripts/model-escalation-advisor.sh +3 -0
- package/templates/.claude/hooks/scripts/schema-validator.sh +3 -0
- package/templates/.claude/hooks/scripts/secret-filter.sh +3 -0
- package/templates/.claude/hooks/scripts/session-env-check.sh +20 -0
- package/templates/.claude/hooks/scripts/stuck-detector.sh +4 -1
- package/templates/.claude/hooks/scripts/task-outcome-recorder.sh +16 -1
- package/templates/.claude/rules/MAY-optimization.md +12 -0
- package/templates/.claude/rules/MUST-agent-design.md +45 -7
- package/templates/.claude/rules/MUST-completion-verification.md +81 -0
- package/templates/.claude/rules/SHOULD-memory-integration.md +81 -0
- package/templates/.claude/skills/de-lead-routing/SKILL.md +8 -92
- package/templates/.claude/skills/deep-plan/SKILL.md +55 -4
- package/templates/.claude/skills/dev-lead-routing/SKILL.md +9 -21
- package/templates/.claude/skills/dev-refactor/SKILL.md +34 -1
- package/templates/.claude/skills/evaluator-optimizer/SKILL.md +53 -0
- package/templates/.claude/skills/qa-lead-routing/SKILL.md +7 -242
- package/templates/.claude/skills/research/SKILL.md +74 -7
- package/templates/.claude/skills/sauron-watch/SKILL.md +81 -0
- package/templates/.claude/skills/secretary-routing/SKILL.md +3 -18
- package/templates/.claude/skills/structured-dev-cycle/SKILL.md +20 -3
- package/templates/guides/claude-code/index.yaml +5 -0
- package/templates/manifest.json +3 -3
- package/templates/.claude/hooks/scripts/session-compliance-report.sh +0 -65
package/dist/index.js
CHANGED
|
@@ -544,7 +544,9 @@ function getDefaultConfig() {
|
|
|
544
544
|
".claude/agent-memory/",
|
|
545
545
|
".claude/agent-memory-local/"
|
|
546
546
|
],
|
|
547
|
-
customComponents: []
|
|
547
|
+
customComponents: [],
|
|
548
|
+
domain: undefined,
|
|
549
|
+
teamMode: false
|
|
548
550
|
};
|
|
549
551
|
}
|
|
550
552
|
function getDefaultPreferences() {
|
|
@@ -1387,6 +1389,7 @@ async function installEntryDocWithTracking(targetDir, options, result) {
|
|
|
1387
1389
|
async function updateInstallConfig(targetDir, options, installedComponents) {
|
|
1388
1390
|
const config = await loadConfig(targetDir);
|
|
1389
1391
|
config.language = options.language ?? DEFAULT_LANGUAGE;
|
|
1392
|
+
config.domain = options.domain;
|
|
1390
1393
|
config.installedAt = new Date().toISOString();
|
|
1391
1394
|
config.installedComponents = installedComponents;
|
|
1392
1395
|
await saveConfig(targetDir, config);
|
package/package.json
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "oh-my-customcode",
|
|
3
|
-
"
|
|
3
|
+
"workspaces": ["packages/*"],
|
|
4
|
+
"version": "0.39.0",
|
|
4
5
|
"description": "Batteries-included agent harness for Claude Code",
|
|
5
6
|
"type": "module",
|
|
6
7
|
"bin": {
|
|
@@ -41,6 +42,7 @@
|
|
|
41
42
|
"prepublishOnly": "bun run build && bun run test"
|
|
42
43
|
},
|
|
43
44
|
"dependencies": {
|
|
45
|
+
"@clack/prompts": "^1.1.0",
|
|
44
46
|
"commander": "^14.0.2",
|
|
45
47
|
"i18next": "^25.8.0",
|
|
46
48
|
"yaml": "^2.8.2"
|
|
@@ -141,6 +141,18 @@
|
|
|
141
141
|
"description": "Record agent outcomes on subagent completion (complements PostToolUse Agent matcher)"
|
|
142
142
|
}
|
|
143
143
|
],
|
|
144
|
+
"PostCompact": [
|
|
145
|
+
{
|
|
146
|
+
"matcher": "*",
|
|
147
|
+
"hooks": [
|
|
148
|
+
{
|
|
149
|
+
"type": "prompt",
|
|
150
|
+
"prompt": "Context was just compacted. MANDATORY post-compaction steps: 1) All enforced rules (R007 Agent ID, R008 Tool ID, R009 Parallel, R010 Orchestrator, R018 Agent Teams) remain ACTIVE — compaction does NOT disable rules. 2) Your next response MUST include agent identification per R007. 3) Continue delegating ALL file modifications to subagents per R010. 4) Re-read CLAUDE.md if needed to refresh project-specific context."
|
|
151
|
+
}
|
|
152
|
+
],
|
|
153
|
+
"description": "Reinforce enforced rules after context compaction — prevents rule amnesia (v2.1.76+)"
|
|
154
|
+
}
|
|
155
|
+
],
|
|
144
156
|
"PostToolUse": [
|
|
145
157
|
{
|
|
146
158
|
"matcher": "tool == \"Bash\"",
|
|
@@ -212,16 +224,6 @@
|
|
|
212
224
|
],
|
|
213
225
|
"description": "Type check Python files with ty after edits"
|
|
214
226
|
},
|
|
215
|
-
{
|
|
216
|
-
"matcher": "tool == \"Task\" || tool == \"Agent\"",
|
|
217
|
-
"hooks": [
|
|
218
|
-
{
|
|
219
|
-
"type": "command",
|
|
220
|
-
"command": "bash .claude/hooks/scripts/task-outcome-recorder.sh"
|
|
221
|
-
}
|
|
222
|
-
],
|
|
223
|
-
"description": "Record agent/task outcomes (success/failure) for model escalation decisions"
|
|
224
|
-
},
|
|
225
227
|
{
|
|
226
228
|
"matcher": "tool == \"Edit\" || tool == \"Write\" || tool == \"Bash\" || tool == \"Task\" || tool == \"Agent\" || tool == \"Read\" || tool == \"Glob\" || tool == \"Grep\"",
|
|
227
229
|
"hooks": [
|
|
@@ -294,15 +296,25 @@
|
|
|
294
296
|
],
|
|
295
297
|
"description": "Final console.log audit and session diagnostics before session ends"
|
|
296
298
|
},
|
|
299
|
+
{
|
|
300
|
+
"matcher": "*",
|
|
301
|
+
"hooks": [
|
|
302
|
+
{
|
|
303
|
+
"type": "command",
|
|
304
|
+
"command": "bash .claude/hooks/scripts/eval-core-batch-save.sh"
|
|
305
|
+
}
|
|
306
|
+
],
|
|
307
|
+
"description": "Batch-save agent outcomes to eval-core DB on session end (advisory, exit 0)"
|
|
308
|
+
},
|
|
297
309
|
{
|
|
298
310
|
"matcher": "*",
|
|
299
311
|
"hooks": [
|
|
300
312
|
{
|
|
301
313
|
"type": "prompt",
|
|
302
|
-
"prompt": "Session-end memory checkpoint (R011 enforcement). Check
|
|
314
|
+
"prompt": "Session-end memory checkpoint (R011 enforcement). Check if the session-end memory save workflow was completed: 1) sys-memory-keeper was delegated to update MEMORY.md 2) claude-mem save was attempted via ToolSearch + mcp__plugin_claude-mem_mcp-search__save_memory. Note: episodic-memory auto-indexes after session — no manual action needed. Decision rules (evaluate in order): 1. If session had no explicit session-end signal (quick question, no memory work): APPROVE immediately. 2. If BOTH steps were already attempted (success or failure both count): APPROVE. 3. If MCP tools unavailable after ToolSearch attempt: APPROVE with note. 4. If steps were NOT yet performed: PERFORM THEM NOW (maximum 1 attempt each): a. Delegate to sys-memory-keeper to update MEMORY.md — call Agent tool with sys-memory-keeper. b. Attempt claude-mem save — ToolSearch for save_memory tool, then call it. c. After completion (success or failure), APPROVE stop. IMPORTANT: Do NOT block indefinitely. After 1 attempt per step, always APPROVE. IMPORTANT: Do NOT loop — if you already performed these steps in this same stop hook evaluation, APPROVE."
|
|
303
315
|
}
|
|
304
316
|
],
|
|
305
|
-
"description": "Enforce R011 session-end memory saves —
|
|
317
|
+
"description": "Enforce R011 session-end memory saves — auto-perform if not yet done (episodic-memory auto-indexes)"
|
|
306
318
|
}
|
|
307
319
|
]
|
|
308
320
|
}
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
set -euo pipefail
|
|
3
3
|
|
|
4
|
+
# Dependency check: exit silently if jq not available
|
|
5
|
+
command -v jq >/dev/null 2>&1 || exit 0
|
|
6
|
+
|
|
4
7
|
# Agent Teams Advisor Hook
|
|
5
8
|
# Trigger: PreToolUse, tool == "Task" || tool == "Agent"
|
|
6
9
|
# Purpose: Track Agent/Task tool usage count per session and warn when Agent Teams may be more appropriate
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
set -euo pipefail
|
|
3
3
|
|
|
4
|
+
# Dependency check: exit silently if jq not available
|
|
5
|
+
command -v jq >/dev/null 2>&1 || exit 0
|
|
6
|
+
|
|
4
7
|
# Context Budget Advisor Hook
|
|
5
8
|
# Trigger: PostToolUse (Edit/Write/Agent/Task/Read/Glob/Grep/Bash)
|
|
6
9
|
# Purpose: Monitor context usage and advise ecomode activation based on task type
|
|
@@ -29,7 +32,7 @@ read_count=${read_count:-0}
|
|
|
29
32
|
agent_count=${agent_count:-0}
|
|
30
33
|
|
|
31
34
|
# Determine tool type from input
|
|
32
|
-
TOOL=$(echo "$input" | jq -r '.
|
|
35
|
+
TOOL=$(echo "$input" | jq -r '.tool_name // ""' 2>/dev/null || echo "")
|
|
33
36
|
tool_count=$((tool_count + 1))
|
|
34
37
|
|
|
35
38
|
case "$TOOL" in
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
# Eval-Core Batch Save on Session End (Advisory Only)
|
|
5
|
+
# Trigger: Stop hook
|
|
6
|
+
# Purpose: Auto-collect eval metrics on session end via eval-core CLI
|
|
7
|
+
# Protocol: stdin JSON -> process -> stdout pass-through, exit 0 always
|
|
8
|
+
#
|
|
9
|
+
# This hook is advisory-only and never blocks session termination.
|
|
10
|
+
# If eval-core is unavailable or collection fails, the session continues normally.
|
|
11
|
+
|
|
12
|
+
input=$(cat)
|
|
13
|
+
PPID_FILE="/tmp/.claude-task-outcomes-${PPID}"
|
|
14
|
+
|
|
15
|
+
# Only attempt collection if outcome file exists and eval-core is available
|
|
16
|
+
if [ -f "$PPID_FILE" ] && command -v eval-core >/dev/null 2>&1; then
|
|
17
|
+
echo "[Hook] Collecting eval metrics via eval-core..." >&2
|
|
18
|
+
eval-core collect --ppid "$PPID" 2>/dev/null || true
|
|
19
|
+
fi
|
|
20
|
+
|
|
21
|
+
# Always pass through input and exit 0 (advisory only)
|
|
22
|
+
echo "$input"
|
|
23
|
+
exit 0
|
|
@@ -3,6 +3,9 @@
|
|
|
3
3
|
# Warns when git operations are delegated to a non-mgr-gitnerd agent via Agent/Task tool.
|
|
4
4
|
# WARN only - does NOT block (exit 0, passes input through).
|
|
5
5
|
|
|
6
|
+
# Dependency check: exit silently if jq not available
|
|
7
|
+
command -v jq >/dev/null 2>&1 || exit 0
|
|
8
|
+
|
|
6
9
|
input=$(cat)
|
|
7
10
|
|
|
8
11
|
agent_type=$(echo "$input" | jq -r '.tool_input.subagent_type // ""')
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
set -euo pipefail
|
|
3
3
|
|
|
4
|
+
# Dependency check: exit silently if jq not available
|
|
5
|
+
command -v jq >/dev/null 2>&1 || exit 0
|
|
6
|
+
|
|
4
7
|
# Model Escalation Advisor Hook
|
|
5
8
|
# Trigger: PreToolUse, tool == "Task" || tool == "Agent"
|
|
6
9
|
# Purpose: Advise model escalation when failure patterns detected
|
|
@@ -82,6 +82,20 @@ if command -v git >/dev/null 2>&1 && git rev-parse --is-inside-work-tree >/dev/n
|
|
|
82
82
|
fi
|
|
83
83
|
fi
|
|
84
84
|
|
|
85
|
+
# --- CI Status Check ---
|
|
86
|
+
# Check last CI run status if gh CLI is available
|
|
87
|
+
if command -v gh &>/dev/null; then
|
|
88
|
+
ci_status=$(gh run list --limit 1 --json conclusion -q '.[0].conclusion' 2>/dev/null || echo "unknown")
|
|
89
|
+
ci_name=$(gh run list --limit 1 --json name -q '.[0].name' 2>/dev/null || echo "unknown")
|
|
90
|
+
if [ "$ci_status" = "failure" ]; then
|
|
91
|
+
echo "[Session] ⚠ WARNING: Last CI run FAILED (${ci_name}) — check before pushing" >&2
|
|
92
|
+
elif [ "$ci_status" = "success" ]; then
|
|
93
|
+
echo "[Session] CI: last run passed (${ci_name})" >&2
|
|
94
|
+
elif [ "$ci_status" != "unknown" ]; then
|
|
95
|
+
echo "[Session] CI: last run status: ${ci_status} (${ci_name})" >&2
|
|
96
|
+
fi
|
|
97
|
+
fi
|
|
98
|
+
|
|
85
99
|
# Update availability check (local cache only — no network calls)
|
|
86
100
|
OMCUSTOM_UPDATE_STATUS="unknown"
|
|
87
101
|
INSTALLED_VERSION=""
|
|
@@ -173,6 +187,12 @@ case "$DRIFT_STATUS" in
|
|
|
173
187
|
esac
|
|
174
188
|
echo "------------------------------------" >&2
|
|
175
189
|
|
|
190
|
+
# SessionEnd hooks timeout (v2.1.74+)
|
|
191
|
+
if [ -z "${CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS:-}" ]; then
|
|
192
|
+
echo "[SessionEnv] ⚠ CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS not set (default: 1500ms)" >&2
|
|
193
|
+
echo "[SessionEnv] Recommend: export CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS=10000" >&2
|
|
194
|
+
fi
|
|
195
|
+
|
|
176
196
|
# Update Check report
|
|
177
197
|
echo "" >&2
|
|
178
198
|
echo " [Update Check]" >&2
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
set -euo pipefail
|
|
3
3
|
|
|
4
|
+
# Dependency check: exit silently if jq not available
|
|
5
|
+
command -v jq >/dev/null 2>&1 || exit 0
|
|
6
|
+
|
|
4
7
|
# Stuck Detector Hook
|
|
5
8
|
# Trigger: PostToolUse, tool matches "Edit|Write|Bash|Task|Agent"
|
|
6
9
|
# Purpose: Detect repetitive failure loops and advise recovery
|
|
@@ -9,7 +12,7 @@ set -euo pipefail
|
|
|
9
12
|
# - exit 1: hard block (extreme stuck loops, >= HARD_BLOCK_THRESHOLD repetitions)
|
|
10
13
|
|
|
11
14
|
# Hard block threshold: consecutive identical operations before blocking
|
|
12
|
-
HARD_BLOCK_THRESHOLD
|
|
15
|
+
HARD_BLOCK_THRESHOLD=${CLAUDE_STUCK_THRESHOLD:-3}
|
|
13
16
|
|
|
14
17
|
input=$(cat)
|
|
15
18
|
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
set -euo pipefail
|
|
3
3
|
|
|
4
|
+
# Dependency check: exit silently if jq not available
|
|
5
|
+
command -v jq >/dev/null 2>&1 || exit 0
|
|
6
|
+
|
|
4
7
|
# Task/Agent Outcome Recorder Hook
|
|
5
8
|
# Trigger: PostToolUse (tool == "Task" || "Agent") and SubagentStop
|
|
6
9
|
# Purpose: Record task outcomes for model escalation decisions
|
|
@@ -13,6 +16,17 @@ agent_type=$(echo "$input" | jq -r '.tool_input.subagent_type // .agent_type //
|
|
|
13
16
|
model=$(echo "$input" | jq -r '.tool_input.model // .model // "inherit"')
|
|
14
17
|
description=$(echo "$input" | jq -r '.tool_input.description // .description // ""' | head -c 80)
|
|
15
18
|
|
|
19
|
+
# Extract skill name from description or prompt
|
|
20
|
+
skill_name=""
|
|
21
|
+
if echo "$description" | grep -qiE '(skill:|routing|→.*skill)'; then
|
|
22
|
+
skill_name=$(echo "$description" | grep -oiE '[a-z]+-[a-z]+(-[a-z]+)*-?(routing|skill|practices|detection|decomposition|orchestration|pipeline|guards|cycle|plan|review|refactor|publish|version|audit|exec|analyze|bundle|report|setup|watch|lists|status|help|save|recall)' | head -1)
|
|
23
|
+
fi
|
|
24
|
+
# Fallback: check prompt field for "Skill: {name}" pattern
|
|
25
|
+
if [ -z "$skill_name" ]; then
|
|
26
|
+
prompt=$(echo "$input" | jq -r '.tool_input.prompt // ""' | head -c 500)
|
|
27
|
+
skill_name=$(echo "$prompt" | grep -oiE 'Skill:\s*[a-z]+-[a-z]+(-[a-z]+)*' | sed 's/[Ss]kill:\s*//' | head -1)
|
|
28
|
+
fi
|
|
29
|
+
|
|
16
30
|
# Determine outcome
|
|
17
31
|
is_error=$(echo "$input" | jq -r '.tool_output.is_error // false')
|
|
18
32
|
|
|
@@ -61,9 +75,10 @@ entry=$(jq -n \
|
|
|
61
75
|
--arg model "$model" \
|
|
62
76
|
--arg outcome "$outcome" \
|
|
63
77
|
--arg pattern "$pattern" \
|
|
78
|
+
--arg skill "$skill_name" \
|
|
64
79
|
--arg desc "$description" \
|
|
65
80
|
--arg err "$error_summary" \
|
|
66
|
-
'{timestamp: $ts, agent_type: $agent, model: $model, outcome: $outcome, pattern_used: $pattern, description: $desc, error_summary: $err}')
|
|
81
|
+
'{timestamp: $ts, agent_type: $agent, model: $model, outcome: $outcome, pattern_used: $pattern, skill: $skill, description: $desc, error_summary: $err}')
|
|
67
82
|
|
|
68
83
|
echo "$entry" >> "$OUTCOME_FILE"
|
|
69
84
|
|
|
@@ -27,3 +27,15 @@
|
|
|
27
27
|
| Repetitive tasks, clear bottleneck, measurable gain | One-time tasks, already fast, complexity > benefit |
|
|
28
28
|
|
|
29
29
|
Readability > Optimization. No optimization without measurement.
|
|
30
|
+
|
|
31
|
+
## CLAUDE.md Context Optimization (v2.1.72+)
|
|
32
|
+
|
|
33
|
+
HTML comments in CLAUDE.md are hidden from the model during auto-injection but visible via Read tool.
|
|
34
|
+
|
|
35
|
+
| Use Case | Example |
|
|
36
|
+
|----------|---------|
|
|
37
|
+
| Metadata tags | `<!-- agents: 44, skills: 74 -->` |
|
|
38
|
+
| Validation checksums | `<!-- validate-docs: hash=abc123 -->` |
|
|
39
|
+
| Conditional context | `<!-- detailed-architecture: see guides/architecture/ -->` |
|
|
40
|
+
|
|
41
|
+
**Rule**: Move model-unnecessary metadata into HTML comments to reduce context token usage. Keep actionable instructions as visible text.
|
|
@@ -11,7 +11,7 @@ Location: `.claude/agents/{name}.md` (single file, kebab-case)
|
|
|
11
11
|
```yaml
|
|
12
12
|
name: agent-name # Unique identifier (kebab-case)
|
|
13
13
|
description: Brief desc # One-line summary
|
|
14
|
-
model: sonnet # sonnet | opus | haiku
|
|
14
|
+
model: sonnet # sonnet | opus | haiku (or full ID: claude-sonnet-4-6)
|
|
15
15
|
tools: [Read, Write, ...] # Allowed tools
|
|
16
16
|
```
|
|
17
17
|
|
|
@@ -31,9 +31,10 @@ escalation: # Model escalation policy (optional)
|
|
|
31
31
|
path: haiku → sonnet → opus # Escalation sequence
|
|
32
32
|
threshold: 2 # Failures before advisory
|
|
33
33
|
soul: true # Enable SOUL.md identity injection
|
|
34
|
-
isolation: worktree
|
|
34
|
+
isolation: worktree | sandbox # worktree = git worktree, sandbox = restricted bash
|
|
35
35
|
background: true # Run in background
|
|
36
36
|
maxTurns: 10 # Max conversation turns
|
|
37
|
+
maxTokens: 100000 # Per-turn token ceiling
|
|
37
38
|
mcpServers: [server-1] # MCP servers available
|
|
38
39
|
hooks: # Agent-specific hooks
|
|
39
40
|
PreToolUse:
|
|
@@ -41,9 +42,33 @@ hooks: # Agent-specific hooks
|
|
|
41
42
|
command: "echo hook"
|
|
42
43
|
permissionMode: bypassPermissions # Permission mode
|
|
43
44
|
disallowedTools: [Bash] # Tools to disallow
|
|
45
|
+
limitations: # Negative capability declarations
|
|
46
|
+
- "cannot execute tests"
|
|
47
|
+
- "cannot modify code"
|
|
48
|
+
domain: backend # backend | frontend | data-engineering | devops | universal
|
|
44
49
|
```
|
|
45
50
|
|
|
46
|
-
> **Note**: `isolation`, `background`, `maxTurns`, `mcpServers`, `hooks`, `permissionMode`, `disallowedTools` are supported in Claude Code v2.1.63+.
|
|
51
|
+
> **Note**: `isolation`, `background`, `maxTurns`, `maxTokens`, `mcpServers`, `hooks`, `permissionMode`, `disallowedTools`, `limitations` are supported in Claude Code v2.1.63+. Hook types `PostCompact`, `Elicitation`, `ElicitationResult` require v2.1.76+.
|
|
52
|
+
|
|
53
|
+
### Isolation Modes
|
|
54
|
+
|
|
55
|
+
| Mode | Behavior | Use Case |
|
|
56
|
+
|------|----------|----------|
|
|
57
|
+
| `worktree` | Isolated git worktree copy | Code changes that need rollback safety |
|
|
58
|
+
| `sandbox` | Restricted Bash environment | Agents running untrusted or scan commands |
|
|
59
|
+
|
|
60
|
+
When `isolation: sandbox` is set, the agent's Bash calls run with restricted permissions. This is advisory metadata — enforcement depends on the execution environment.
|
|
61
|
+
|
|
62
|
+
### Token Ceiling
|
|
63
|
+
|
|
64
|
+
When `maxTokens` is set, it serves as advisory metadata for the orchestrator to manage agent turn budgets. The orchestrator should track output and consider escalation or task splitting when an agent approaches its ceiling.
|
|
65
|
+
|
|
66
|
+
### Negative Capabilities (Limitations)
|
|
67
|
+
|
|
68
|
+
The `limitations` field declares what an agent explicitly CANNOT or SHOULD NOT do. This enables:
|
|
69
|
+
1. **Clearer routing**: Orchestrator knows agent boundaries
|
|
70
|
+
2. **Safer delegation**: Prevents accidental capability overreach
|
|
71
|
+
3. **Better documentation**: Makes agent scope explicit
|
|
47
72
|
|
|
48
73
|
### Escalation Policy
|
|
49
74
|
|
|
@@ -144,6 +169,19 @@ user-invocable: false # Whether user can invoke directly
|
|
|
144
169
|
disable-model-invocation: true # Prevent model from auto-invoking
|
|
145
170
|
```
|
|
146
171
|
|
|
172
|
+
### Skill Effectiveness Tracking
|
|
173
|
+
|
|
174
|
+
Skills can optionally track effectiveness metrics via auto-populated fields:
|
|
175
|
+
|
|
176
|
+
```yaml
|
|
177
|
+
effectiveness: # Auto-populated by sys-memory-keeper
|
|
178
|
+
invocations: 0 # Total invocation count across sessions
|
|
179
|
+
success_rate: 0.0 # Success rate (0.0-1.0)
|
|
180
|
+
last_invoked: "" # ISO-8601 timestamp
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
These fields are read-only from the skill's perspective — sys-memory-keeper updates them at session end based on task-outcome-recorder data. They inform model selection, routing optimization, and skill maintenance priorities.
|
|
184
|
+
|
|
147
185
|
## Skill Scope
|
|
148
186
|
|
|
149
187
|
| Scope | Purpose | Deployed via init? |
|
|
@@ -156,7 +194,7 @@ Default: `core` (when field is omitted)
|
|
|
156
194
|
|
|
157
195
|
### Context Fork Criteria
|
|
158
196
|
|
|
159
|
-
Use `context: fork` for skills that orchestrate multi-agent workflows. Cap at **
|
|
197
|
+
Use `context: fork` for skills that orchestrate multi-agent workflows. Cap at **12 total** across the project.
|
|
160
198
|
|
|
161
199
|
| Use `context: fork` | Do NOT use `context: fork` |
|
|
162
200
|
|---------------------|---------------------------|
|
|
@@ -165,10 +203,10 @@ Use `context: fork` for skills that orchestrate multi-agent workflows. Cap at **
|
|
|
165
203
|
| Multi-agent coordination patterns | Single-agent reference skills |
|
|
166
204
|
| Task decomposition/planning | External tool integrations |
|
|
167
205
|
|
|
168
|
-
Current skills with `context: fork` (
|
|
206
|
+
Current skills with `context: fork` (11/12 cap):
|
|
169
207
|
- secretary-routing, dev-lead-routing, de-lead-routing, qa-lead-routing
|
|
170
|
-
- dag-orchestration, task-decomposition, worker-reviewer-pipeline
|
|
171
|
-
-
|
|
208
|
+
- dag-orchestration, task-decomposition, worker-reviewer-pipeline, pipeline-guards
|
|
209
|
+
- deep-plan, evaluator-optimizer, sauron-watch
|
|
172
210
|
|
|
173
211
|
## Naming
|
|
174
212
|
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# [MUST] Completion Verification Rules
|
|
2
|
+
|
|
3
|
+
> **Priority**: MUST | **ID**: R020
|
|
4
|
+
|
|
5
|
+
## Core Rule
|
|
6
|
+
|
|
7
|
+
Before declaring any task `[Done]`, verify completion against task-type-specific criteria. False completion declarations erode trust and cause downstream failures.
|
|
8
|
+
|
|
9
|
+
## Task-Type Completion Matrix
|
|
10
|
+
|
|
11
|
+
| Task Type | REQUIRED Verification Before [Done] |
|
|
12
|
+
|-----------|-------------------------------------|
|
|
13
|
+
| Release | All issues closed, version bumped, PR merged, GitHub Release created |
|
|
14
|
+
| Implementation | Code compiles/passes lint, tests pass (if exist), no TODO markers left |
|
|
15
|
+
| Documentation | Links valid, counts accurate, cross-references updated |
|
|
16
|
+
| Git Operations | Operation succeeded (check exit code), working tree clean |
|
|
17
|
+
| Code Review | All findings addressed or explicitly deferred with justification |
|
|
18
|
+
| Agent/Skill Creation | Frontmatter valid, referenced skills exist, routing updated |
|
|
19
|
+
|
|
20
|
+
## Self-Check (Before Declaring Done)
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
╔══════════════════════════════════════════════════════════════════╗
|
|
24
|
+
║ BEFORE DECLARING [Done], ASK YOURSELF: ║
|
|
25
|
+
║ ║
|
|
26
|
+
║ 1. Did I verify the ACTUAL outcome (not just attempt)? ║
|
|
27
|
+
║ "I ran the command" ≠ "the command succeeded" ║
|
|
28
|
+
║ YES → Continue ║
|
|
29
|
+
║ NO → Verify outcome first ║
|
|
30
|
+
║ ║
|
|
31
|
+
║ 2. Does the task type have specific criteria above? ║
|
|
32
|
+
║ YES → Check each criterion ║
|
|
33
|
+
║ NO → Apply general verification ║
|
|
34
|
+
║ ║
|
|
35
|
+
║ 3. Are there any unchecked items in the task's checklist? ║
|
|
36
|
+
║ YES → Complete them or explicitly defer with reason ║
|
|
37
|
+
║ NO → Good. Proceed to [Done] ║
|
|
38
|
+
║ ║
|
|
39
|
+
║ 4. Would I bet $100 this task is truly complete? ║
|
|
40
|
+
║ YES → Declare [Done] ║
|
|
41
|
+
║ NO → Identify what's uncertain and verify ║
|
|
42
|
+
╚══════════════════════════════════════════════════════════════════╝
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Common False Completion Patterns
|
|
46
|
+
|
|
47
|
+
| Pattern | Reality | Fix |
|
|
48
|
+
|---------|---------|-----|
|
|
49
|
+
| "Command executed" | Exit code not checked | Check `$?` or tool output |
|
|
50
|
+
| "File created" | Content not verified | Read file back, verify content |
|
|
51
|
+
| "PR created" | CI not checked | Wait for CI, verify green |
|
|
52
|
+
| "Issue closed" | Related issues not updated | Check parent epic, cross-refs |
|
|
53
|
+
| "Tests pass" | Only ran subset | Run full test suite |
|
|
54
|
+
|
|
55
|
+
## Completion Contract Format
|
|
56
|
+
|
|
57
|
+
For complex tasks, declare completion contract upfront:
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
[Contract] Task: {name}
|
|
61
|
+
├── Criterion 1: {specific, verifiable condition}
|
|
62
|
+
├── Criterion 2: {specific, verifiable condition}
|
|
63
|
+
└── Criterion N: {specific, verifiable condition}
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Then at completion:
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
[Done] Task: {name}
|
|
70
|
+
├── ✓ Criterion 1: {evidence}
|
|
71
|
+
├── ✓ Criterion 2: {evidence}
|
|
72
|
+
└── ✓ Criterion N: {evidence}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Integration
|
|
76
|
+
|
|
77
|
+
| Rule | Interaction |
|
|
78
|
+
|------|-------------|
|
|
79
|
+
| R003 | [Done] status format now requires verification evidence |
|
|
80
|
+
| R010 | Orchestrator verifies subagent completion claims |
|
|
81
|
+
| R017 | Structural changes require sauron verification before [Done] |
|
|
@@ -14,6 +14,7 @@ Rule: If native auto memory can handle it, do NOT use claude-mem.
|
|
|
14
14
|
Agent frontmatter `memory: project|user|local` enables persistent memory:
|
|
15
15
|
- System creates memory directory, loads first 200 lines of MEMORY.md into prompt
|
|
16
16
|
- Read/Write/Edit tools auto-enabled for memory directory
|
|
17
|
+
- Custom directory: set `autoMemoryDirectory` in settings to override default paths (v2.1.74+)
|
|
17
18
|
|
|
18
19
|
| Scope | Location | Git Tracked |
|
|
19
20
|
|-------|----------|-------------|
|
|
@@ -71,6 +72,45 @@ Memory entries in MEMORY.md should include confidence annotations to distinguish
|
|
|
71
72
|
[any] → contradicted by evidence → demoted or removed
|
|
72
73
|
```
|
|
73
74
|
|
|
75
|
+
### Temporal Decay
|
|
76
|
+
|
|
77
|
+
Memory entries include an optional verification timestamp for decay tracking:
|
|
78
|
+
|
|
79
|
+
**Format**: `[confidence: high, verified: 2026-03-15]`
|
|
80
|
+
|
|
81
|
+
| Age (unverified) | Action |
|
|
82
|
+
|-------------------|--------|
|
|
83
|
+
| 0-30 days | No change — entry is fresh |
|
|
84
|
+
| 30-60 days | Demote one level (high→medium, medium→low) |
|
|
85
|
+
| 60-90 days | Demote again if not re-verified |
|
|
86
|
+
| 90+ days | Removal candidate — flag for review |
|
|
87
|
+
|
|
88
|
+
**Decay Schedule**:
|
|
89
|
+
```
|
|
90
|
+
Day 0: [confidence: high, verified: 2026-03-15]
|
|
91
|
+
Day 30: [confidence: high, verified: 2026-03-15] ← still within window
|
|
92
|
+
Day 31: [confidence: medium, verified: 2026-03-15] ← auto-demoted
|
|
93
|
+
Day 61: [confidence: low, verified: 2026-03-15] ← demoted again
|
|
94
|
+
Day 91: [REVIEW NEEDED, verified: 2026-03-15] ← flagged
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
**Re-verification**: Any session that confirms a memory entry resets the verified date:
|
|
98
|
+
```
|
|
99
|
+
Before: [confidence: medium, verified: 2026-01-15]
|
|
100
|
+
Action: Pattern confirmed in session
|
|
101
|
+
After: [confidence: high, verified: 2026-03-15]
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
**Enforcement**: sys-memory-keeper checks decay at session start and end:
|
|
105
|
+
1. Session start: scan MEMORY.md for entries past decay threshold
|
|
106
|
+
2. Flag stale entries with `[STALE]` prefix
|
|
107
|
+
3. Session end: remove or demote unconfirmed stale entries
|
|
108
|
+
|
|
109
|
+
**Exceptions**: Entries marked `[permanent]` are exempt from decay:
|
|
110
|
+
```
|
|
111
|
+
### Architecture Decisions [confidence: high, permanent]
|
|
112
|
+
```
|
|
113
|
+
|
|
74
114
|
## Behavioral Memory
|
|
75
115
|
|
|
76
116
|
MEMORY.md supports an optional `## Behaviors` section for tracking user interaction preferences and workflow patterns.
|
|
@@ -140,6 +180,47 @@ When sys-memory-keeper updates MEMORY.md at session end:
|
|
|
140
180
|
2. Findings that match existing entries → promote confidence
|
|
141
181
|
3. Findings that contradict existing entries → flag for review
|
|
142
182
|
|
|
183
|
+
## Agent Metrics
|
|
184
|
+
|
|
185
|
+
MEMORY.md supports an optional `## Metrics` section for tracking per-agent-type performance data.
|
|
186
|
+
|
|
187
|
+
### Metrics Section Format
|
|
188
|
+
|
|
189
|
+
```markdown
|
|
190
|
+
## Metrics [auto-updated by sys-memory-keeper]
|
|
191
|
+
|
|
192
|
+
| Agent Type | Tasks | Success Rate | Avg Model | Last Used |
|
|
193
|
+
|------------|-------|-------------|-----------|-----------|
|
|
194
|
+
| lang-golang-expert | 12 | 92% | sonnet | 2026-03-15 |
|
|
195
|
+
| mgr-gitnerd | 8 | 100% | sonnet | 2026-03-15 |
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### Metrics Collection
|
|
199
|
+
|
|
200
|
+
sys-memory-keeper aggregates metrics at session end:
|
|
201
|
+
|
|
202
|
+
1. Read `/tmp/.claude-task-outcomes-${PPID}` (JSONL from task-outcome-recorder hook)
|
|
203
|
+
2. Parse each entry: `{agent_type, outcome, model, timestamp}`
|
|
204
|
+
3. Aggregate by agent_type: total tasks, success count, model distribution
|
|
205
|
+
4. Merge with existing Metrics table in MEMORY.md
|
|
206
|
+
5. Budget: max 20 rows (prune lowest-usage agents when exceeded)
|
|
207
|
+
|
|
208
|
+
### Metrics Fields
|
|
209
|
+
|
|
210
|
+
| Field | Source | Calculation |
|
|
211
|
+
|-------|--------|-------------|
|
|
212
|
+
| Tasks | task-outcome-recorder JSONL | Count of entries per agent_type |
|
|
213
|
+
| Success Rate | outcome field | `success_count / total_count * 100` |
|
|
214
|
+
| Avg Model | model field | Most frequently used model |
|
|
215
|
+
| Last Used | timestamp field | Most recent invocation |
|
|
216
|
+
|
|
217
|
+
### Budget Management
|
|
218
|
+
|
|
219
|
+
The Metrics section shares the 200-line MEMORY.md budget:
|
|
220
|
+
1. Max 20 agent rows in Metrics table
|
|
221
|
+
2. When adding new agent, prune agent with lowest task count
|
|
222
|
+
3. Merge identical agent types across sessions (cumulative)
|
|
223
|
+
|
|
143
224
|
## Session-End Auto-Save
|
|
144
225
|
|
|
145
226
|
### Trigger
|