@doidor/agentrig 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +224 -0
- package/dist/agent/claude.js +125 -0
- package/dist/agent/claude.js.map +1 -0
- package/dist/agent/copilot.js +147 -0
- package/dist/agent/copilot.js.map +1 -0
- package/dist/agent/index.js +17 -0
- package/dist/agent/index.js.map +1 -0
- package/dist/agent/provider.js +10 -0
- package/dist/agent/provider.js.map +1 -0
- package/dist/cli.js +169 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands/compile.js +42 -0
- package/dist/commands/compile.js.map +1 -0
- package/dist/commands/dashboard.js +35 -0
- package/dist/commands/dashboard.js.map +1 -0
- package/dist/commands/doctor.js +40 -0
- package/dist/commands/doctor.js.map +1 -0
- package/dist/commands/eval.js +178 -0
- package/dist/commands/eval.js.map +1 -0
- package/dist/commands/init.js +100 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/update.js +176 -0
- package/dist/commands/update.js.map +1 -0
- package/dist/core/activity.js +80 -0
- package/dist/core/activity.js.map +1 -0
- package/dist/core/audit.js +112 -0
- package/dist/core/audit.js.map +1 -0
- package/dist/core/compile.js +250 -0
- package/dist/core/compile.js.map +1 -0
- package/dist/core/fsutil.js +45 -0
- package/dist/core/fsutil.js.map +1 -0
- package/dist/core/install.js +97 -0
- package/dist/core/install.js.map +1 -0
- package/dist/core/knowledge.js +34 -0
- package/dist/core/knowledge.js.map +1 -0
- package/dist/core/logger.js +31 -0
- package/dist/core/logger.js.map +1 -0
- package/dist/core/paths.js +22 -0
- package/dist/core/paths.js.map +1 -0
- package/dist/core/setupsteps.js +72 -0
- package/dist/core/setupsteps.js.map +1 -0
- package/dist/core/state.js +19 -0
- package/dist/core/state.js.map +1 -0
- package/dist/core/surfaces.js +62 -0
- package/dist/core/surfaces.js.map +1 -0
- package/dist/prompts/index.js +117 -0
- package/dist/prompts/index.js.map +1 -0
- package/dist/version.js +26 -0
- package/dist/version.js.map +1 -0
- package/knowledge/PRINCIPLES.md +106 -0
- package/knowledge/manifest.json +247 -0
- package/knowledge/templates/AGENTS.md +66 -0
- package/knowledge/templates/AGENTS.package.example.md +19 -0
- package/knowledge/templates/agents/README.md +33 -0
- package/knowledge/templates/agents/developer.md +7 -0
- package/knowledge/templates/agents/developer.yml +7 -0
- package/knowledge/templates/agents/judge.md +6 -0
- package/knowledge/templates/agents/judge.yml +6 -0
- package/knowledge/templates/agents/reviewer.md +6 -0
- package/knowledge/templates/agents/reviewer.yml +7 -0
- package/knowledge/templates/agents/triager.md +8 -0
- package/knowledge/templates/agents/triager.yml +8 -0
- package/knowledge/templates/dashboard/dashboard.mjs +261 -0
- package/knowledge/templates/eval/RUBRIC.md +94 -0
- package/knowledge/templates/eval/axes.json +56 -0
- package/knowledge/templates/eval/checks.json +304 -0
- package/knowledge/templates/eval/sandbox/eval-rules.md +23 -0
- package/knowledge/templates/eval/scenarios/README.md +24 -0
- package/knowledge/templates/eval/scenarios/add-small-feature.md +28 -0
- package/knowledge/templates/eval/scenarios/fix-failing-test.md +27 -0
- package/knowledge/templates/eval/scenarios/review-catches-bug.md +30 -0
- package/knowledge/templates/eval/score.mjs +257 -0
- package/knowledge/templates/eval/static-audit.mjs +112 -0
- package/knowledge/templates/harness/ORCHESTRATION.md +53 -0
- package/knowledge/templates/harness/state-machine.yml +105 -0
- package/knowledge/templates/mcp/mcp.json +12 -0
- package/knowledge/templates/rules/README.md +32 -0
- package/knowledge/templates/rules/code-review.md +26 -0
- package/knowledge/templates/rules/coding-standards.md +15 -0
- package/knowledge/templates/rules/no-debug-logging.md +16 -0
- package/knowledge/templates/rules/security.md +23 -0
- package/knowledge/templates/scripts/repair-worktrees.sh +124 -0
- package/knowledge/templates/skills/fix-ci/SKILL.md +17 -0
- package/knowledge/templates/skills/harness-eval/SKILL.md +83 -0
- package/knowledge/templates/skills/self-verify/SKILL.md +25 -0
- package/knowledge/templates/skills/skill-authoring/SKILL.md +35 -0
- package/knowledge/templates/skills/skill-improver/SKILL.md +23 -0
- package/knowledge/templates/skills/verify-loop/SKILL.md +35 -0
- package/knowledge/templates/wiki/README.md +23 -0
- package/knowledge/templates/wiki/_TEMPLATE.md +16 -0
- package/knowledge/templates/wiki/index.md +29 -0
- package/knowledge/templates/wiki/troubleshooting.md +14 -0
- package/package.json +70 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Hermetic per-agent worktrees (principle 7) + safe crash recovery.
|
|
3
|
+
# Adapted from epichan's repair_agent_worktrees.py.
|
|
4
|
+
#
|
|
5
|
+
# Usage:
|
|
6
|
+
# repair-worktrees.sh add <agent-id> [branch] Create/reuse an isolated worktree, print its path
|
|
7
|
+
# repair-worktrees.sh repair [--apply] Prune stale metadata + safely recover worktrees
|
|
8
|
+
# (dry-run unless --apply)
|
|
9
|
+
# repair-worktrees.sh Same as `repair` (dry-run)
|
|
10
|
+
#
|
|
11
|
+
# Safety rules for `repair` (so we never corrupt a live agent session):
|
|
12
|
+
# - Stale .git/index.lock / HEAD.lock are removed ONLY if older than LOCK_MIN_AGE_SECONDS AND not
|
|
13
|
+
# currently open (checked via lsof when available).
|
|
14
|
+
# - A worktree with open files (lsof) is treated as ACTIVE and skipped entirely.
|
|
15
|
+
# - Before any `git reset --hard` / `git clean -fd`, dirty files are ARCHIVED to
|
|
16
|
+
# ~/.agentrig/worktree-archives/<timestamp>/<agent>/ so interrupted work is recoverable.
|
|
17
|
+
set -euo pipefail
|
|
18
|
+
|
|
19
|
+
REPO_ROOT="$(git rev-parse --show-toplevel)"
|
|
20
|
+
WORKTREE_BASE="${AGENTRIG_WORKTREE_BASE:-$HOME/.agentrig/worktrees/$(basename "$REPO_ROOT")}"
|
|
21
|
+
ARCHIVE_BASE="${AGENTRIG_WORKTREE_ARCHIVE:-$HOME/.agentrig/worktree-archives}"
|
|
22
|
+
LOCK_MIN_AGE_SECONDS="${AGENTRIG_LOCK_MIN_AGE_SECONDS:-120}"
|
|
23
|
+
LOCK_NAMES=("index.lock" "HEAD.lock")
|
|
24
|
+
|
|
25
|
+
mkdir -p "$WORKTREE_BASE"
|
|
26
|
+
|
|
27
|
+
path_has_open_files() {
|
|
28
|
+
# Returns 0 (true) if lsof reports any open handle under the path. If lsof is absent, assume not.
|
|
29
|
+
local target="$1"
|
|
30
|
+
command -v lsof >/dev/null 2>&1 || return 1
|
|
31
|
+
if [ -d "$target" ]; then
|
|
32
|
+
lsof +D "$target" >/dev/null 2>&1
|
|
33
|
+
else
|
|
34
|
+
lsof "$target" >/dev/null 2>&1
|
|
35
|
+
fi
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
file_age_seconds() {
|
|
39
|
+
local f="$1" now mtime
|
|
40
|
+
now="$(date +%s)"
|
|
41
|
+
# GNU stat (-c) then BSD/macOS stat (-f).
|
|
42
|
+
mtime="$(stat -c %Y "$f" 2>/dev/null || stat -f %m "$f" 2>/dev/null || echo "$now")"
|
|
43
|
+
echo "$(( now - mtime ))"
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
cmd_add() {
|
|
47
|
+
local agent_id="${1:?usage: repair-worktrees.sh add <agent-id> [branch]}"
|
|
48
|
+
local branch="${2:-agentrig/$agent_id}"
|
|
49
|
+
# Prune stale metadata BEFORE every add (the classic "worktree add refuses" crash).
|
|
50
|
+
git -C "$REPO_ROOT" worktree prune --expire now
|
|
51
|
+
local dir="$WORKTREE_BASE/$agent_id"
|
|
52
|
+
if git -C "$REPO_ROOT" worktree list --porcelain | grep -q "^worktree $dir$"; then
|
|
53
|
+
echo "Reusing worktree: $dir" >&2
|
|
54
|
+
else
|
|
55
|
+
git -C "$REPO_ROOT" worktree add -B "$branch" "$dir" >/dev/null
|
|
56
|
+
echo "Created worktree: $dir (branch $branch)" >&2
|
|
57
|
+
fi
|
|
58
|
+
echo "$dir"
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
cmd_repair() {
|
|
62
|
+
local apply="${1:-}"
|
|
63
|
+
local do_apply=0
|
|
64
|
+
[ "$apply" = "--apply" ] && do_apply=1
|
|
65
|
+
|
|
66
|
+
echo "Pruning stale worktree metadata…" >&2
|
|
67
|
+
[ "$do_apply" -eq 1 ] && git -C "$REPO_ROOT" worktree prune --expire now || git -C "$REPO_ROOT" worktree prune --expire now --dry-run || true
|
|
68
|
+
|
|
69
|
+
[ -d "$WORKTREE_BASE" ] || { echo "No worktrees under $WORKTREE_BASE"; return 0; }
|
|
70
|
+
|
|
71
|
+
local ts; ts="$(date +%Y%m%d-%H%M%S)"
|
|
72
|
+
for dir in "$WORKTREE_BASE"/*/; do
|
|
73
|
+
[ -d "$dir" ] || continue
|
|
74
|
+
dir="${dir%/}"
|
|
75
|
+
local agent; agent="$(basename "$dir")"
|
|
76
|
+
|
|
77
|
+
if path_has_open_files "$dir"; then
|
|
78
|
+
echo "[skip-active] $agent (open files present)"
|
|
79
|
+
continue
|
|
80
|
+
fi
|
|
81
|
+
|
|
82
|
+
# Remove stale, unopened lock files only.
|
|
83
|
+
local gitdir="$dir/.git"
|
|
84
|
+
[ -f "$dir/.git" ] && gitdir="$(sed -n 's/^gitdir: //p' "$dir/.git")"
|
|
85
|
+
for lock in "${LOCK_NAMES[@]}"; do
|
|
86
|
+
local lock_path="$gitdir/$lock"
|
|
87
|
+
[ -f "$lock_path" ] || continue
|
|
88
|
+
if path_has_open_files "$lock_path"; then
|
|
89
|
+
echo "[skip-active-lock] $agent/$lock"
|
|
90
|
+
elif [ "$(file_age_seconds "$lock_path")" -ge "$LOCK_MIN_AGE_SECONDS" ]; then
|
|
91
|
+
echo "[remove-lock] $agent/$lock"
|
|
92
|
+
[ "$do_apply" -eq 1 ] && rm -f "$lock_path"
|
|
93
|
+
else
|
|
94
|
+
echo "[skip-young-lock] $agent/$lock"
|
|
95
|
+
fi
|
|
96
|
+
done
|
|
97
|
+
|
|
98
|
+
# Archive dirty files before any reset/clean.
|
|
99
|
+
if [ -n "$(git -C "$dir" status --porcelain 2>/dev/null)" ]; then
|
|
100
|
+
local archive="$ARCHIVE_BASE/$ts/$agent"
|
|
101
|
+
echo "[archive+reset] $agent (dirty) -> $archive"
|
|
102
|
+
if [ "$do_apply" -eq 1 ]; then
|
|
103
|
+
mkdir -p "$archive"
|
|
104
|
+
git -C "$dir" status --porcelain -z | while IFS= read -r -d '' entry; do
|
|
105
|
+
local f="${entry:3}"
|
|
106
|
+
[ -f "$dir/$f" ] || continue
|
|
107
|
+
mkdir -p "$archive/$(dirname "$f")"
|
|
108
|
+
cp -p "$dir/$f" "$archive/$f" 2>/dev/null || true
|
|
109
|
+
done
|
|
110
|
+
git -C "$dir" reset --hard >/dev/null 2>&1 || true
|
|
111
|
+
git -C "$dir" clean -fd >/dev/null 2>&1 || true
|
|
112
|
+
fi
|
|
113
|
+
else
|
|
114
|
+
echo "[clean] $agent"
|
|
115
|
+
fi
|
|
116
|
+
done
|
|
117
|
+
[ "$do_apply" -eq 1 ] || echo "(dry-run — re-run with --apply to make changes)"
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
case "${1:-repair}" in
|
|
121
|
+
add) shift; cmd_add "$@" ;;
|
|
122
|
+
repair) shift; cmd_repair "${1:-}" ;;
|
|
123
|
+
*) cmd_repair "${1:-}" ;;
|
|
124
|
+
esac
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: fix-ci
|
|
3
|
+
description: Diagnose and fix a failing CI run for the current branch, then re-verify.
|
|
4
|
+
triggers:
|
|
5
|
+
- check_suite.completed.failure
|
|
6
|
+
- "user asks to fix CI / a red build"
|
|
7
|
+
allowed-tools: Bash Read Grep Glob
|
|
8
|
+
argument-hint: "[run-url|run-id]"
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# fix-ci (principles 5, 8)
|
|
12
|
+
|
|
13
|
+
1. Fetch the failing job logs (prefer `gh run view --log-failed`).
|
|
14
|
+
2. Reproduce locally with the smallest command that fails.
|
|
15
|
+
3. Fix the root cause — not the symptom. Avoid disabling tests to go green.
|
|
16
|
+
4. Re-run `self-verify`. Iterate up to 3 times; otherwise self-park.
|
|
17
|
+
5. If a rule or skill should have prevented this failure, run `skill-improver`.
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: harness-eval
|
|
3
|
+
description: Evaluate THIS repository's agent harness — a deterministic structure audit plus an independent, rubric-driven dynamic eval (run/spec/review) with A/B variant comparison.
|
|
4
|
+
triggers:
|
|
5
|
+
- "evaluate the harness"
|
|
6
|
+
- pre_merge hook
|
|
7
|
+
- "did my harness change make things better or worse?"
|
|
8
|
+
allowed-tools: Bash Read Grep Glob
|
|
9
|
+
argument-hint: "[--static|--dynamic] [--scenario id] [--variant v]"
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# harness-eval (principle 6 — evaluate the harness itself)
|
|
13
|
+
|
|
14
|
+
A harness you cannot measure is a harness you cannot improve. This skill scores the harness on two
|
|
15
|
+
complementary layers and writes results to `.agentrig/eval/results/` (validated, never hand-edited).
|
|
16
|
+
|
|
17
|
+
## Layer A — static audit (deterministic, no model)
|
|
18
|
+
Each of the 12 principles maps to concrete checks in `.agentrig/eval/checks.json`, scored 0/0.5/1.0.
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
node .agentrig/eval/static-audit.mjs # human-readable report + aggregate score
|
|
22
|
+
node .agentrig/eval/static-audit.mjs --json # machine-readable, for CI gates
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Use this in CI and as a fast pre-merge gate. It needs no model and no network.
|
|
26
|
+
|
|
27
|
+
## Layer B — dynamic behavioral eval (agentic, independent judge)
|
|
28
|
+
Run scenarios in `.agentrig/eval/scenarios/*.md` through the harness, then score as an **independent
|
|
29
|
+
judge** (a different model than the producer) against `.agentrig/eval/RUBRIC.md` and the registry in
|
|
30
|
+
`.agentrig/eval/axes.json`.
|
|
31
|
+
|
|
32
|
+
**Sandbox:** obey `.agentrig/eval/sandbox/eval-rules.md` — work in a throwaway worktree; never push,
|
|
33
|
+
open PRs, or merge.
|
|
34
|
+
|
|
35
|
+
**Lifecycle:** score the whole lifecycle, not just the patch. Use the rubric `--type` that matches
|
|
36
|
+
the scenario: `spec` (task quality), `run` (implementation), `review` (the reviewer's behavior).
|
|
37
|
+
Link them with a shared `--task` id.
|
|
38
|
+
|
|
39
|
+
**Rules (enforced by score.mjs):** strict 0/0.5/1.0 tiers; any axis < 1.0 needs an issue code from
|
|
40
|
+
that axis's registry **plus** an evidence string; unobserved axes are `=na`; rollups are recomputed
|
|
41
|
+
from axis data.
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
node .agentrig/eval/score.mjs save --type run --task <id> --scenario <id> --judge <model> \
|
|
45
|
+
--axis 'correctness=1.0' \
|
|
46
|
+
--axis 'scope=0.5:OQ-SCOPE-CHURN:left build artifacts in the diff' \
|
|
47
|
+
--axis 'tests=na'
|
|
48
|
+
node .agentrig/eval/score.mjs report
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
**Artifacts:** for each run, save `diff.patch`, a short `output` transcript, and `meta.json`
|
|
52
|
+
(scenario, base_commit, variant, model, duration) next to the score so regressions are inspectable.
|
|
53
|
+
|
|
54
|
+
## Comparing harness changes (A/B)
|
|
55
|
+
To know whether a prompt/skill/rule change helped, run the **same** scenario before and after under
|
|
56
|
+
different `--variant`s, then:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
node .agentrig/eval/score.mjs compare --scenario <id>
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
A change that lowers the aggregate is a regression even if it "feels" better. A static score < 1.0
|
|
63
|
+
on a principle points at a missing/weak artifact — fix the artifact, then re-audit.
|
|
64
|
+
|
|
65
|
+
## Does the harness actually help? (with vs without)
|
|
66
|
+
The most important question for a consumer: *does installing AgentRig's harness make agents better
|
|
67
|
+
in THIS repo?* Measure it by running the same scenarios twice and comparing:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
# 1) Harness ON (the agent uses AGENTS.md + rules + skills as installed)
|
|
71
|
+
agentrig eval --dynamic --scenario <id> --variant harness
|
|
72
|
+
|
|
73
|
+
# 2) Baseline — harness OFF (a bare agent; ignore AGENTS.md/.agents/instructions surfaces)
|
|
74
|
+
agentrig eval --dynamic --scenario <id> --variant baseline
|
|
75
|
+
|
|
76
|
+
# 3) Report the lift (per-axis + aggregate delta + a HELPS/HURTS verdict)
|
|
77
|
+
node .agentrig/eval/score.mjs compare --scenario <id> --baseline baseline
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
For a rigorous baseline, run the harness-off trial in a sandbox/worktree with the harness + compiled
|
|
81
|
+
surfaces moved aside (`AGENTS.md`, `.agents/`, `.github/instructions/`, `CLAUDE.md`, `.cursor/`), so
|
|
82
|
+
the agent genuinely has no harness guidance. A positive aggregate delta means the harness helps in
|
|
83
|
+
this repo; track it over time as you tune rules/skills/prompts.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: self-verify
|
|
3
|
+
description: Run the project's own build/test/lint and converge before handing work to a reviewer.
|
|
4
|
+
triggers:
|
|
5
|
+
- before requesting review
|
|
6
|
+
- before opening a PR
|
|
7
|
+
allowed-tools: Bash Read Grep Glob
|
|
8
|
+
argument-hint: "[--max-iterations N]"
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# self-verify (principle 5)
|
|
12
|
+
|
|
13
|
+
After producing changes, **verify your own work before handoff**. Do not invoke the reviewer until
|
|
14
|
+
this loop converges.
|
|
15
|
+
|
|
16
|
+
## Steps
|
|
17
|
+
1. Run the install/build/test/lint commands recorded in `AGENTS.md` (the `commands` block).
|
|
18
|
+
2. If all green → **continue** to review.
|
|
19
|
+
3. If red → read the failure, fix, and re-run. Cap at **N=3** iterations (default).
|
|
20
|
+
4. If still red after N → **self-park**: leave a precise note (what failed, what you tried) and
|
|
21
|
+
move the task to `parked`. Never hand a red build to a reviewer.
|
|
22
|
+
|
|
23
|
+
## Notes
|
|
24
|
+
- Pin verification to your own HEAD; do not trust stale CI from an earlier commit.
|
|
25
|
+
- Record any new gotcha in `.agents/wiki/`.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: skill-authoring
|
|
3
|
+
description: Admission bar and structure for writing a new skill, so the skill library stays lean and discoverable.
|
|
4
|
+
triggers:
|
|
5
|
+
- "create / add a new skill"
|
|
6
|
+
- "this procedure keeps coming up"
|
|
7
|
+
allowed-tools: Read Edit Grep Glob
|
|
8
|
+
argument-hint: "<skill-name>"
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# skill-authoring (principle 4, 8)
|
|
12
|
+
|
|
13
|
+
Skills are procedural memory. A bloated skill library is as useless as no library, so new skills
|
|
14
|
+
must clear an admission bar.
|
|
15
|
+
|
|
16
|
+
## Admission test (all must hold)
|
|
17
|
+
1. **Reusable, not one-off.** It encodes how to do *one recurring thing* well — not a single task.
|
|
18
|
+
2. **Not already covered.** Search `.agents/skills/`; if an existing skill is close, **sharpen it**
|
|
19
|
+
instead of adding a near-duplicate.
|
|
20
|
+
3. **A procedure, not a reflex.** Passive, always-on constraints belong in `.agents/rules/`, not a
|
|
21
|
+
skill.
|
|
22
|
+
|
|
23
|
+
## Structure
|
|
24
|
+
- `SKILL.md` with YAML frontmatter: `name`, a `description` **< 250 chars**, `triggers`,
|
|
25
|
+
`allowed-tools` (scope the blast radius), and an optional `argument-hint`.
|
|
26
|
+
- Body: short, imperative steps. Cap iteration loops and state the fallback.
|
|
27
|
+
- Put long reference material in a sibling `references/` file and link to it, so the skill itself
|
|
28
|
+
stays small and the agent loads detail only on demand.
|
|
29
|
+
- Scope `allowed-tools` to the minimum (e.g. `Bash Read Grep Glob`).
|
|
30
|
+
|
|
31
|
+
## Keep surfaces in sync
|
|
32
|
+
When you add or remove a skill, update the skills inventory in `AGENTS.md` (the
|
|
33
|
+
`AGENTRIG:skills-inventory` block) so every surface advertises the same set. If the repo mirrors
|
|
34
|
+
skills across vendor dirs (`.claude`/`.copilot`/`.agents`/…), they should all point at one canonical
|
|
35
|
+
source (AgentRig wires these as symlinks).
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: skill-improver
|
|
3
|
+
description: Turn a reviewer/judge failure into an instruction-surface change that passes a prevention test.
|
|
4
|
+
triggers:
|
|
5
|
+
- "a mistake recurred"
|
|
6
|
+
- "reviewer feedback points at a missing rule/skill"
|
|
7
|
+
allowed-tools: Read Edit Grep Glob
|
|
8
|
+
argument-hint: "<short description of the failure>"
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# skill-improver (principle 8)
|
|
12
|
+
|
|
13
|
+
Every mistake is a prompt bug. Convert it into a durable instruction change.
|
|
14
|
+
|
|
15
|
+
## Procedure
|
|
16
|
+
1. Identify the **instruction surface** that should have prevented the failure (a rule, a skill, or
|
|
17
|
+
`AGENTS.md` Critical Rules).
|
|
18
|
+
2. Propose the minimal wording change.
|
|
19
|
+
3. **Prevention test (mandatory):** would this new wording have changed the *original* failure? If
|
|
20
|
+
not, the change is rejected.
|
|
21
|
+
4. **Admission test:** does an existing rule already cover this? If yes, do not duplicate — sharpen
|
|
22
|
+
the existing one. Duplication is what kills wikis.
|
|
23
|
+
5. Record the gotcha in `.agents/wiki/` (central, committed) or the local wiki if repo-specific.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: verify-loop
|
|
3
|
+
description: General wait → inspect → fix (max 3) → self-park loop for any post-action verification (build, tests, CI, visual, lint).
|
|
4
|
+
triggers:
|
|
5
|
+
- after pushing changes / before requesting review
|
|
6
|
+
- "an async check (CI, visual, e2e) needs to be waited on and acted upon"
|
|
7
|
+
allowed-tools: Bash Read Grep Glob
|
|
8
|
+
argument-hint: "[--max-iterations N] [--check <command-or-workflow>]"
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# verify-loop (principle 5, generalized)
|
|
12
|
+
|
|
13
|
+
A reusable decision loop for converging on *any* verification signal before handoff. Generalizes the
|
|
14
|
+
visual-self-verify pattern to builds, tests, CI runs, lint, or e2e.
|
|
15
|
+
|
|
16
|
+
## Loop
|
|
17
|
+
1. **Trigger** the check (or wait for the async one pinned to your own HEAD — never trust a stale
|
|
18
|
+
result from an earlier commit).
|
|
19
|
+
2. **Inspect** the result:
|
|
20
|
+
- **Green / no unintended change →** *continue* (proceed to review/handoff).
|
|
21
|
+
- **Red / unintended change →** go to step 3.
|
|
22
|
+
- **Intended but human-gated change →** *self-park* (see below). Do not iterate.
|
|
23
|
+
3. **Fix** the root cause and re-run. Cap at **N = 3** iterations (default).
|
|
24
|
+
4. **After N failures →** stop iterating and take a recovery path: self-park with a precise note, or
|
|
25
|
+
escalate. Do not loop indefinitely.
|
|
26
|
+
|
|
27
|
+
## Self-park
|
|
28
|
+
When the right next step needs a human (low reversibility, an intended diff behind a human-only
|
|
29
|
+
gate, or repeated failure), leave a clear note describing what you saw and what you tried, move the
|
|
30
|
+
task to `parked`, and **never apply the human-only label yourself**.
|
|
31
|
+
|
|
32
|
+
## Notes
|
|
33
|
+
- Pin every check to your current HEAD.
|
|
34
|
+
- Record any new gotcha in `.agents/wiki/`.
|
|
35
|
+
- This is the engine behind `self-verify`; use `verify-loop` whenever the signal is asynchronous.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Agent wiki — tiered memory (principle 8)
|
|
2
|
+
|
|
3
|
+
Every mistake is a prompt bug. This is where durable gotchas live.
|
|
4
|
+
|
|
5
|
+
## Tiers
|
|
6
|
+
1. **Central wiki (this directory, committed):** repo-wide, reviewed gotchas. CODEOWNERS-gate it.
|
|
7
|
+
2. **Local wiki (git-ignored):** machine/contributor-specific notes. Add `*.local.md` to
|
|
8
|
+
`.gitignore`.
|
|
9
|
+
3. **Session scratch:** ephemeral working notes (e.g. `plan.md`); never a substitute for the wiki.
|
|
10
|
+
|
|
11
|
+
## Admission test (strict — duplication kills wikis)
|
|
12
|
+
Before adding an entry, confirm no existing entry covers it. If one does, **sharpen it** instead of
|
|
13
|
+
adding a near-duplicate. Each entry should be: a title, the symptom, the root cause, the fix, and a
|
|
14
|
+
one-line prevention.
|
|
15
|
+
|
|
16
|
+
## Entry template
|
|
17
|
+
```markdown
|
|
18
|
+
### <short title>
|
|
19
|
+
- **Symptom:** what went wrong / how it showed up
|
|
20
|
+
- **Cause:** the real root cause
|
|
21
|
+
- **Fix:** the change that resolved it
|
|
22
|
+
- **Prevention:** the rule/skill wording that would have stopped it (feed to skill-improver)
|
|
23
|
+
```
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# <short, greppable title>
|
|
2
|
+
|
|
3
|
+
> Copy this file to `.agents/wiki/<slug>.md` for a new gotcha. Keep the four sections.
|
|
4
|
+
|
|
5
|
+
## Symptoms
|
|
6
|
+
What it looked like / how it surfaced (error text, behavior). Make it greppable.
|
|
7
|
+
|
|
8
|
+
## Root cause
|
|
9
|
+
The real underlying cause — not the symptom.
|
|
10
|
+
|
|
11
|
+
## Fix
|
|
12
|
+
The exact change that resolved it. Include commands/snippets where useful.
|
|
13
|
+
|
|
14
|
+
## Prevention
|
|
15
|
+
The rule/skill wording that would have stopped this. If a rule or skill should have caught it, feed
|
|
16
|
+
this to `skill-improver`. Add a `## Related` section with links if relevant.
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Agent wiki — index & routing
|
|
2
|
+
|
|
3
|
+
This wiki holds **learned gotchas and war stories** — durable lessons an agent discovered the hard
|
|
4
|
+
way. It is **not** a mirror of the docs or skills.
|
|
5
|
+
|
|
6
|
+
## What belongs where
|
|
7
|
+
| Kind of knowledge | Goes in |
|
|
8
|
+
|-------------------|---------|
|
|
9
|
+
| A gotcha / non-obvious failure + its fix | **this wiki** (`.agents/wiki/<slug>.md`) |
|
|
10
|
+
| A repeatable procedure ("how to do X") | a skill (`.agents/skills/`) |
|
|
11
|
+
| A passive, always-on constraint | a rule (`.agents/rules/`) |
|
|
12
|
+
| Repo-wide policy / critical rules | `AGENTS.md` |
|
|
13
|
+
| Common error → fix lookups | `troubleshooting.md` (in this dir) |
|
|
14
|
+
|
|
15
|
+
If a gotcha becomes a reusable procedure, **promote it to a skill** and leave a one-line pointer
|
|
16
|
+
here.
|
|
17
|
+
|
|
18
|
+
## Tiers (principle 8)
|
|
19
|
+
1. **Central wiki (this dir, committed):** repo-wide, reviewed gotchas. CODEOWNERS-gate it.
|
|
20
|
+
2. **Local wiki (git-ignored `*.local.md`):** machine/contributor-specific notes.
|
|
21
|
+
3. **Session scratch:** ephemeral working notes; never a substitute for the wiki.
|
|
22
|
+
|
|
23
|
+
## Index
|
|
24
|
+
_Add a one-line link per entry as you create it, newest first._
|
|
25
|
+
- (none yet)
|
|
26
|
+
|
|
27
|
+
## Admission test (strict — duplication kills wikis)
|
|
28
|
+
Before adding an entry, confirm no existing entry covers it. If one does, **sharpen it** instead of
|
|
29
|
+
adding a near-duplicate. Use the format in `_TEMPLATE.md`.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Troubleshooting — common errors and fixes
|
|
2
|
+
|
|
3
|
+
A shared, living list of errors agents (and humans) hit in this repo and the fix that worked. Prefer
|
|
4
|
+
adding here over re-debugging the same thing twice. Keep each entry tight.
|
|
5
|
+
|
|
6
|
+
## Format
|
|
7
|
+
```
|
|
8
|
+
### <error message or symptom, greppable>
|
|
9
|
+
- **When:** the situation it shows up in
|
|
10
|
+
- **Fix:** the exact thing that resolves it
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Entries
|
|
14
|
+
_None yet. Add new entries below this line (newest first)._
|
package/package.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@doidor/agentrig",
|
|
3
|
+
"version": "0.5.3",
|
|
4
|
+
"description": "AgentRig — an agentic meta-harness. A CLI that investigates a repository and installs (and evaluates) a best-practice agent harness.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"agentrig": "dist/cli.js"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"dist",
|
|
11
|
+
"knowledge",
|
|
12
|
+
"README.md",
|
|
13
|
+
"LICENSE"
|
|
14
|
+
],
|
|
15
|
+
"publishConfig": {
|
|
16
|
+
"access": "public"
|
|
17
|
+
},
|
|
18
|
+
"repository": {
|
|
19
|
+
"type": "git",
|
|
20
|
+
"url": "git+https://github.com/doidor/agentrig.git"
|
|
21
|
+
},
|
|
22
|
+
"homepage": "https://github.com/doidor/agentrig#readme",
|
|
23
|
+
"bugs": {
|
|
24
|
+
"url": "https://github.com/doidor/agentrig/issues"
|
|
25
|
+
},
|
|
26
|
+
"engines": {
|
|
27
|
+
"node": ">=22.0.0"
|
|
28
|
+
},
|
|
29
|
+
"scripts": {
|
|
30
|
+
"build": "tsc -p tsconfig.json",
|
|
31
|
+
"dev": "tsc -p tsconfig.json --watch",
|
|
32
|
+
"clean": "rm -rf dist",
|
|
33
|
+
"prepare": "npm run build",
|
|
34
|
+
"prepublishOnly": "npm run clean && npm run build && npm test",
|
|
35
|
+
"start": "node dist/cli.js",
|
|
36
|
+
"test": "npm run build && node --test test/*.test.mjs",
|
|
37
|
+
"selftest": "node dist/cli.js eval --static . || true",
|
|
38
|
+
"changeset": "changeset",
|
|
39
|
+
"version-packages": "changeset version",
|
|
40
|
+
"release": "changeset publish"
|
|
41
|
+
},
|
|
42
|
+
"keywords": [
|
|
43
|
+
"agent",
|
|
44
|
+
"harness",
|
|
45
|
+
"copilot",
|
|
46
|
+
"agentic",
|
|
47
|
+
"scaffold",
|
|
48
|
+
"evaluation",
|
|
49
|
+
"ai"
|
|
50
|
+
],
|
|
51
|
+
"license": "MIT",
|
|
52
|
+
"dependencies": {
|
|
53
|
+
"@github/copilot-sdk": "^1.0.0",
|
|
54
|
+
"zod": "^4.3.6"
|
|
55
|
+
},
|
|
56
|
+
"peerDependencies": {
|
|
57
|
+
"@anthropic-ai/claude-agent-sdk": ">=0.3.0"
|
|
58
|
+
},
|
|
59
|
+
"peerDependenciesMeta": {
|
|
60
|
+
"@anthropic-ai/claude-agent-sdk": {
|
|
61
|
+
"optional": true
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
"devDependencies": {
|
|
65
|
+
"@changesets/changelog-github": "^0.7.0",
|
|
66
|
+
"@changesets/cli": "^2.31.0",
|
|
67
|
+
"@types/node": "^22.0.0",
|
|
68
|
+
"typescript": "^5.6.0"
|
|
69
|
+
}
|
|
70
|
+
}
|