@wardrail/plugin 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +33 -33
- package/.mcp.json +13 -13
- package/LICENSE +21 -0
- package/README.md +52 -52
- package/hooks/hooks.json +15 -15
- package/hooks/tasks-session-start.mjs +58 -58
- package/hooks/verify-checkpoint.mjs +201 -200
- package/package.json +27 -26
- package/skills/checkpoint/SKILL.md +61 -61
- package/skills/task/SKILL.md +69 -69
|
@@ -1,33 +1,33 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "wardrail",
|
|
3
|
-
"displayName": "Wardrail",
|
|
4
|
-
"version": "0.1.0",
|
|
5
|
-
"description": "Keep your coding agent on the rails: consult your project's Wardrail contract while it writes code, plus a checkpoint->clear->resume workflow with checkpoints that are machine-verified, not self-reported.",
|
|
6
|
-
"author": { "name": "Ghostables Ltd", "url": "https://wardrail.io" },
|
|
7
|
-
"homepage": "https://wardrail.io",
|
|
8
|
-
"license": "UNLICENSED",
|
|
9
|
-
"keywords": ["wardrail", "guardrails", "mcp", "checkpoint", "context", "ai"],
|
|
10
|
-
"userConfig": {
|
|
11
|
-
"WARDRAIL_URL": {
|
|
12
|
-
"type": "string",
|
|
13
|
-
"title": "Wardrail URL",
|
|
14
|
-
"description": "Your Wardrail server. Leave the default unless self-hosting.",
|
|
15
|
-
"default": "https://wardrail.io",
|
|
16
|
-
"required": false
|
|
17
|
-
},
|
|
18
|
-
"WARDRAIL_INGEST_TOKEN": {
|
|
19
|
-
"type": "string",
|
|
20
|
-
"title": "Wardrail ingest token",
|
|
21
|
-
"description": "Project-scoped token from Wardrail -> Trust -> Attest from CI. Scopes reads to exactly one project; never grants account access.",
|
|
22
|
-
"sensitive": true,
|
|
23
|
-
"required": true
|
|
24
|
-
},
|
|
25
|
-
"ANTHROPIC_API_KEY": {
|
|
26
|
-
"type": "string",
|
|
27
|
-
"title": "Anthropic API key (optional)",
|
|
28
|
-
"description": "Only needed for wardrail_review_diff. Stays on your machine and goes straight to Anthropic; Wardrail never sees it.",
|
|
29
|
-
"sensitive": true,
|
|
30
|
-
"required": false
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "wardrail",
|
|
3
|
+
"displayName": "Wardrail",
|
|
4
|
+
"version": "0.1.0",
|
|
5
|
+
"description": "Keep your coding agent on the rails: consult your project's Wardrail contract while it writes code, plus a checkpoint->clear->resume workflow with checkpoints that are machine-verified, not self-reported.",
|
|
6
|
+
"author": { "name": "Ghostables Ltd", "url": "https://wardrail.ghostables.io" },
|
|
7
|
+
"homepage": "https://wardrail.ghostables.io",
|
|
8
|
+
"license": "UNLICENSED",
|
|
9
|
+
"keywords": ["wardrail", "guardrails", "mcp", "checkpoint", "context", "ai"],
|
|
10
|
+
"userConfig": {
|
|
11
|
+
"WARDRAIL_URL": {
|
|
12
|
+
"type": "string",
|
|
13
|
+
"title": "Wardrail URL",
|
|
14
|
+
"description": "Your Wardrail server. Leave the default unless self-hosting.",
|
|
15
|
+
"default": "https://wardrail.ghostables.io",
|
|
16
|
+
"required": false
|
|
17
|
+
},
|
|
18
|
+
"WARDRAIL_INGEST_TOKEN": {
|
|
19
|
+
"type": "string",
|
|
20
|
+
"title": "Wardrail ingest token",
|
|
21
|
+
"description": "Project-scoped token from Wardrail -> Trust -> Attest from CI. Scopes reads to exactly one project; never grants account access.",
|
|
22
|
+
"sensitive": true,
|
|
23
|
+
"required": true
|
|
24
|
+
},
|
|
25
|
+
"ANTHROPIC_API_KEY": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"title": "Anthropic API key (optional)",
|
|
28
|
+
"description": "Only needed for wardrail_review_diff. Stays on your machine and goes straight to Anthropic; Wardrail never sees it.",
|
|
29
|
+
"sensitive": true,
|
|
30
|
+
"required": false
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
package/.mcp.json
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
{
|
|
2
|
-
"mcpServers": {
|
|
3
|
-
"wardrail": {
|
|
4
|
-
"command": "npx",
|
|
5
|
-
"args": ["-y", "@wardrail/mcp"],
|
|
6
|
-
"env": {
|
|
7
|
-
"WARDRAIL_URL": "${user_config.WARDRAIL_URL}",
|
|
8
|
-
"WARDRAIL_INGEST_TOKEN": "${user_config.WARDRAIL_INGEST_TOKEN}",
|
|
9
|
-
"ANTHROPIC_API_KEY": "${user_config.ANTHROPIC_API_KEY}"
|
|
10
|
-
}
|
|
11
|
-
}
|
|
12
|
-
}
|
|
13
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"mcpServers": {
|
|
3
|
+
"wardrail": {
|
|
4
|
+
"command": "npx",
|
|
5
|
+
"args": ["-y", "@wardrail/mcp"],
|
|
6
|
+
"env": {
|
|
7
|
+
"WARDRAIL_URL": "${user_config.WARDRAIL_URL}",
|
|
8
|
+
"WARDRAIL_INGEST_TOKEN": "${user_config.WARDRAIL_INGEST_TOKEN}",
|
|
9
|
+
"ANTHROPIC_API_KEY": "${user_config.ANTHROPIC_API_KEY}"
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
}
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ghostables Ltd
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,52 +1,52 @@
|
|
|
1
|
-
# @wardrail/plugin
|
|
2
|
-
|
|
3
|
-
The **Wardrail Claude Code plugin**. One install gives your coding agent two things:
|
|
4
|
-
|
|
5
|
-
1. **The Wardrail contract, while it codes** — the four [`@wardrail/mcp`](https://www.npmjs.com/package/@wardrail/mcp)
|
|
6
|
-
tools (`get_contract`, `check_path`, `get_findings`, `review_diff`), so the agent stays
|
|
7
|
-
on the rails *before* a violation lands.
|
|
8
|
-
2. **A context-saving workflow** — `/task` and `/checkpoint` slash commands, a SessionStart
|
|
9
|
-
resume listing, and a machine **verifier** so a checkpoint *can't lie*: it reconciles a
|
|
10
|
-
task file's claims against `git`, a real test run, and a diff scan before it's allowed to
|
|
11
|
-
be marked done.
|
|
12
|
-
|
|
13
|
-
This is the same anti-drift, verify-don't-trust thesis Wardrail applies to your *code*,
|
|
14
|
-
turned on the agent's own working memory.
|
|
15
|
-
|
|
16
|
-
## Install
|
|
17
|
-
|
|
18
|
-
```bash
|
|
19
|
-
# add the marketplace (served by Wardrail), then install the plugin
|
|
20
|
-
claude plugin marketplace add https://wardrail.io/marketplace.json
|
|
21
|
-
claude plugin install wardrail@wardrail
|
|
22
|
-
```
|
|
23
|
-
|
|
24
|
-
At enable time you'll be asked for:
|
|
25
|
-
|
|
26
|
-
| Value | Required | What it is |
|
|
27
|
-
|---|---|---|
|
|
28
|
-
| `WARDRAIL_URL` | no | Your Wardrail server. Defaults to `https://wardrail.io`. |
|
|
29
|
-
| `WARDRAIL_INGEST_TOKEN` | **yes** | Project-scoped token from Wardrail → **Trust → Attest from CI**. Scopes reads to one project; never grants account access. |
|
|
30
|
-
| `ANTHROPIC_API_KEY` | no | Only for `wardrail_review_diff`. Stays on your machine; Wardrail never sees it. |
|
|
31
|
-
|
|
32
|
-
These feed the bundled MCP server's environment via `${user_config.*}`. The `@wardrail/mcp`
|
|
33
|
-
server is fetched on demand by `npx`.
|
|
34
|
-
|
|
35
|
-
## What's in the box
|
|
36
|
-
|
|
37
|
-
```
|
|
38
|
-
.claude-plugin/plugin.json manifest + userConfig prompts
|
|
39
|
-
skills/task/SKILL.md /task new|resume <slug>
|
|
40
|
-
skills/checkpoint/SKILL.md /checkpoint (runs the verifier)
|
|
41
|
-
hooks/hooks.json SessionStart -> resume listing
|
|
42
|
-
hooks/tasks-session-start.mjs
|
|
43
|
-
hooks/verify-checkpoint.mjs the machine verifier
|
|
44
|
-
.mcp.json the Wardrail MCP server (npx -y @wardrail/mcp)
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
## The workflow
|
|
48
|
-
|
|
49
|
-
`/checkpoint` → `/clear` → `/task resume <slug>`. Task files live per-project in
|
|
50
|
-
`./tasks/<slug>.md` and carry a small, honest snapshot so a fresh session rehydrates from
|
|
51
|
-
~3k tokens instead of a 150k transcript. `/checkpoint` stamps a tamper-evident
|
|
52
|
-
`## Verification` block; `status: done` over a failed check is downgraded to FAIL.
|
|
1
|
+
# @wardrail/plugin
|
|
2
|
+
|
|
3
|
+
The **Wardrail Claude Code plugin**. One install gives your coding agent two things:
|
|
4
|
+
|
|
5
|
+
1. **The Wardrail contract, while it codes** — the four [`@wardrail/mcp`](https://www.npmjs.com/package/@wardrail/mcp)
|
|
6
|
+
tools (`get_contract`, `check_path`, `get_findings`, `review_diff`), so the agent stays
|
|
7
|
+
on the rails *before* a violation lands.
|
|
8
|
+
2. **A context-saving workflow** — `/task` and `/checkpoint` slash commands, a SessionStart
|
|
9
|
+
resume listing, and a machine **verifier** so a checkpoint *can't lie*: it reconciles a
|
|
10
|
+
task file's claims against `git`, a real test run, and a diff scan before it's allowed to
|
|
11
|
+
be marked done.
|
|
12
|
+
|
|
13
|
+
This is the same anti-drift, verify-don't-trust thesis Wardrail applies to your *code*,
|
|
14
|
+
turned on the agent's own working memory.
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# add the marketplace (served by Wardrail), then install the plugin
|
|
20
|
+
claude plugin marketplace add https://wardrail.ghostables.io/marketplace.json
|
|
21
|
+
claude plugin install wardrail@wardrail
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
At enable time you'll be asked for:
|
|
25
|
+
|
|
26
|
+
| Value | Required | What it is |
|
|
27
|
+
|---|---|---|
|
|
28
|
+
| `WARDRAIL_URL` | no | Your Wardrail server. Defaults to `https://wardrail.ghostables.io`. |
|
|
29
|
+
| `WARDRAIL_INGEST_TOKEN` | **yes** | Project-scoped token from Wardrail → **Trust → Attest from CI**. Scopes reads to one project; never grants account access. |
|
|
30
|
+
| `ANTHROPIC_API_KEY` | no | Only for `wardrail_review_diff`. Stays on your machine; Wardrail never sees it. |
|
|
31
|
+
|
|
32
|
+
These feed the bundled MCP server's environment via `${user_config.*}`. The `@wardrail/mcp`
|
|
33
|
+
server is fetched on demand by `npx`.
|
|
34
|
+
|
|
35
|
+
## What's in the box
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
.claude-plugin/plugin.json manifest + userConfig prompts
|
|
39
|
+
skills/task/SKILL.md /task new|resume <slug>
|
|
40
|
+
skills/checkpoint/SKILL.md /checkpoint (runs the verifier)
|
|
41
|
+
hooks/hooks.json SessionStart -> resume listing
|
|
42
|
+
hooks/tasks-session-start.mjs
|
|
43
|
+
hooks/verify-checkpoint.mjs the machine verifier
|
|
44
|
+
.mcp.json the Wardrail MCP server (npx -y @wardrail/mcp)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## The workflow
|
|
48
|
+
|
|
49
|
+
`/checkpoint` → `/clear` → `/task resume <slug>`. Task files live per-project in
|
|
50
|
+
`./tasks/<slug>.md` and carry a small, honest snapshot so a fresh session rehydrates from
|
|
51
|
+
~3k tokens instead of a 150k transcript. `/checkpoint` stamps a tamper-evident
|
|
52
|
+
`## Verification` block; `status: done` over a failed check is downgraded to FAIL.
|
package/hooks/hooks.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
{
|
|
2
|
-
"hooks": {
|
|
3
|
-
"SessionStart": [
|
|
4
|
-
{
|
|
5
|
-
"hooks": [
|
|
6
|
-
{
|
|
7
|
-
"type": "command",
|
|
8
|
-
"command": "node",
|
|
9
|
-
"args": ["${CLAUDE_PLUGIN_ROOT}/hooks/tasks-session-start.mjs"]
|
|
10
|
-
}
|
|
11
|
-
]
|
|
12
|
-
}
|
|
13
|
-
]
|
|
14
|
-
}
|
|
15
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"hooks": {
|
|
3
|
+
"SessionStart": [
|
|
4
|
+
{
|
|
5
|
+
"hooks": [
|
|
6
|
+
{
|
|
7
|
+
"type": "command",
|
|
8
|
+
"command": "node",
|
|
9
|
+
"args": ["${CLAUDE_PLUGIN_ROOT}/hooks/tasks-session-start.mjs"]
|
|
10
|
+
}
|
|
11
|
+
]
|
|
12
|
+
}
|
|
13
|
+
]
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -1,58 +1,58 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// SessionStart hook for the tasks plugin.
|
|
3
|
-
// Prints active task files in the current project's ./tasks/ so resuming is one step.
|
|
4
|
-
// Silent (no output) when there is no ./tasks/ dir or no active task — costs nothing
|
|
5
|
-
// in projects that don't use the workflow.
|
|
6
|
-
|
|
7
|
-
import { readFileSync, readdirSync, existsSync } from "node:fs";
|
|
8
|
-
import { join } from "node:path";
|
|
9
|
-
|
|
10
|
-
function readStdin() {
|
|
11
|
-
try {
|
|
12
|
-
return readFileSync(0, "utf8");
|
|
13
|
-
} catch {
|
|
14
|
-
return "";
|
|
15
|
-
}
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
let cwd = process.cwd();
|
|
19
|
-
try {
|
|
20
|
-
const input = JSON.parse(readStdin() || "{}");
|
|
21
|
-
if (input.cwd) cwd = input.cwd;
|
|
22
|
-
} catch {
|
|
23
|
-
// no/!json stdin — fall back to process.cwd()
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
const tasksDir = join(cwd, "tasks");
|
|
27
|
-
if (!existsSync(tasksDir)) process.exit(0);
|
|
28
|
-
|
|
29
|
-
const active = [];
|
|
30
|
-
for (const name of readdirSync(tasksDir)) {
|
|
31
|
-
if (!name.endsWith(".md")) continue;
|
|
32
|
-
let text;
|
|
33
|
-
try {
|
|
34
|
-
text = readFileSync(join(tasksDir, name), "utf8");
|
|
35
|
-
} catch {
|
|
36
|
-
continue;
|
|
37
|
-
}
|
|
38
|
-
const fm = text.match(/^---\r?\n([\s\S]*?)\r?\n---/);
|
|
39
|
-
if (!fm || !/^status:\s*active\s*$/m.test(fm[1])) continue;
|
|
40
|
-
|
|
41
|
-
const slug = name.replace(/\.md$/, "");
|
|
42
|
-
const next = text.match(/##\s*Next step\s*\r?\n+([^\r\n]+)/);
|
|
43
|
-
// Last machine verdict from the checkpoint verifier, if any — surfaced so a resume
|
|
44
|
-
// sees up front whether the prior checkpoint left open issues (WARN/FAIL).
|
|
45
|
-
const verdict = text.match(/^-\s*Verdict:\s*(\S+)\s*(PASS|WARN|FAIL)/m);
|
|
46
|
-
active.push({ slug, next: next ? next[1].trim() : "", verdict: verdict ? `${verdict[1]} ${verdict[2]}` : "" });
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
if (active.length === 0) process.exit(0);
|
|
50
|
-
|
|
51
|
-
const lines = ["Active task file(s) in ./tasks/ — resume cheaply with `/task resume <slug>`:"];
|
|
52
|
-
for (const t of active) {
|
|
53
|
-
let line = `- ${t.slug}`;
|
|
54
|
-
if (t.verdict) line += ` [last checkpoint: ${t.verdict}]`;
|
|
55
|
-
if (t.next) line += ` — next: ${t.next}`;
|
|
56
|
-
lines.push(line);
|
|
57
|
-
}
|
|
58
|
-
process.stdout.write(lines.join("\n") + "\n");
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// SessionStart hook for the tasks plugin.
|
|
3
|
+
// Prints active task files in the current project's ./tasks/ so resuming is one step.
|
|
4
|
+
// Silent (no output) when there is no ./tasks/ dir or no active task — costs nothing
|
|
5
|
+
// in projects that don't use the workflow.
|
|
6
|
+
|
|
7
|
+
import { readFileSync, readdirSync, existsSync } from "node:fs";
|
|
8
|
+
import { join } from "node:path";
|
|
9
|
+
|
|
10
|
+
function readStdin() {
|
|
11
|
+
try {
|
|
12
|
+
return readFileSync(0, "utf8");
|
|
13
|
+
} catch {
|
|
14
|
+
return "";
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
let cwd = process.cwd();
|
|
19
|
+
try {
|
|
20
|
+
const input = JSON.parse(readStdin() || "{}");
|
|
21
|
+
if (input.cwd) cwd = input.cwd;
|
|
22
|
+
} catch {
|
|
23
|
+
// no/!json stdin — fall back to process.cwd()
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const tasksDir = join(cwd, "tasks");
|
|
27
|
+
if (!existsSync(tasksDir)) process.exit(0);
|
|
28
|
+
|
|
29
|
+
const active = [];
|
|
30
|
+
for (const name of readdirSync(tasksDir)) {
|
|
31
|
+
if (!name.endsWith(".md")) continue;
|
|
32
|
+
let text;
|
|
33
|
+
try {
|
|
34
|
+
text = readFileSync(join(tasksDir, name), "utf8");
|
|
35
|
+
} catch {
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
const fm = text.match(/^---\r?\n([\s\S]*?)\r?\n---/);
|
|
39
|
+
if (!fm || !/^status:\s*active\s*$/m.test(fm[1])) continue;
|
|
40
|
+
|
|
41
|
+
const slug = name.replace(/\.md$/, "");
|
|
42
|
+
const next = text.match(/##\s*Next step\s*\r?\n+([^\r\n]+)/);
|
|
43
|
+
// Last machine verdict from the checkpoint verifier, if any — surfaced so a resume
|
|
44
|
+
// sees up front whether the prior checkpoint left open issues (WARN/FAIL).
|
|
45
|
+
const verdict = text.match(/^-\s*Verdict:\s*(\S+)\s*(PASS|WARN|FAIL)/m);
|
|
46
|
+
active.push({ slug, next: next ? next[1].trim() : "", verdict: verdict ? `${verdict[1]} ${verdict[2]}` : "" });
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (active.length === 0) process.exit(0);
|
|
50
|
+
|
|
51
|
+
const lines = ["Active task file(s) in ./tasks/ — resume cheaply with `/task resume <slug>`:"];
|
|
52
|
+
for (const t of active) {
|
|
53
|
+
let line = `- ${t.slug}`;
|
|
54
|
+
if (t.verdict) line += ` [last checkpoint: ${t.verdict}]`;
|
|
55
|
+
if (t.next) line += ` — next: ${t.next}`;
|
|
56
|
+
lines.push(line);
|
|
57
|
+
}
|
|
58
|
+
process.stdout.write(lines.join("\n") + "\n");
|
|
@@ -1,200 +1,201 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// verify-checkpoint.mjs — machine-produced verification for the tasks plugin.
|
|
3
|
-
//
|
|
4
|
-
// Reconciles a task file's CLAIMS against ground truth, then stamps a tamper-evident
|
|
5
|
-
// `## Verification` block into the file. The model does not narrate this — the script
|
|
6
|
-
// observes git, runs the tests, and scans the diff itself, so a checkpoint cannot claim
|
|
7
|
-
// work that didn't happen. A `status: done` task with any failed check is downgraded to
|
|
8
|
-
// FAIL.
|
|
9
|
-
//
|
|
10
|
-
// Usage: node verify-checkpoint.mjs [slug]
|
|
11
|
-
// cwd must be the project root. With no slug, auto-detects the single active task.
|
|
12
|
-
//
|
|
13
|
-
// v1 checks: (1) git Files-touched reconciliation, (2) real test run
|
|
14
|
-
// (tasks/.verify.json override, else package.json `test`), (3) honesty scan of the diff
|
|
15
|
-
// for TODO/FIXME/placeholder. Each check degrades gracefully and never reports a pass
|
|
16
|
-
// it didn't observe.
|
|
17
|
-
|
|
18
|
-
import { readFileSync, writeFileSync, readdirSync, existsSync } from "node:fs";
|
|
19
|
-
import { execSync } from "node:child_process";
|
|
20
|
-
import { join, isAbsolute } from "node:path";
|
|
21
|
-
import { homedir } from "node:os";
|
|
22
|
-
|
|
23
|
-
const cwd = process.cwd();
|
|
24
|
-
const tasksDir = join(cwd, "tasks");
|
|
25
|
-
|
|
26
|
-
function fail(msg) {
|
|
27
|
-
process.stderr.write(msg + "\n");
|
|
28
|
-
process.exit(1);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
// ---- locate the task file -------------------------------------------------
|
|
32
|
-
let slug = process.argv[2];
|
|
33
|
-
if (!slug) {
|
|
34
|
-
if (!existsSync(tasksDir)) fail("No ./tasks/ directory and no slug given.");
|
|
35
|
-
const active = readdirSync(tasksDir).filter((n) => {
|
|
36
|
-
if (!n.endsWith(".md")) return false;
|
|
37
|
-
const fm = readFileSync(join(tasksDir, n), "utf8").match(/^---\r?\n([\s\S]*?)\r?\n---/);
|
|
38
|
-
return fm && /^status:\s*active\s*$/m.test(fm[1]);
|
|
39
|
-
});
|
|
40
|
-
if (active.length === 1) slug = active[0].replace(/\.md$/, "");
|
|
41
|
-
else fail(`Need a slug — ${active.length} active tasks found.`);
|
|
42
|
-
}
|
|
43
|
-
const taskPath = join(tasksDir, `${slug}.md`);
|
|
44
|
-
if (!existsSync(taskPath)) fail(`No task file at tasks/${slug}.md`);
|
|
45
|
-
let doc = readFileSync(taskPath, "utf8");
|
|
46
|
-
|
|
47
|
-
// ---- parse claims ---------------------------------------------------------
|
|
48
|
-
const status = (doc.match(/^status:\s*(\w+)/m) || [, "active"])[1];
|
|
49
|
-
|
|
50
|
-
const ftSection = doc.match(/##\s*Files touched\s*\r?\n([\s\S]*?)(?:\r?\n##\s|\s*$)/);
|
|
51
|
-
const claimedFiles = [];
|
|
52
|
-
if (ftSection) {
|
|
53
|
-
for (const line of ftSection[1].split(/\r?\n/)) {
|
|
54
|
-
const m = line.match(/^\s*-\s+(.+?)(?:\s+[—-]\s.*)?$/);
|
|
55
|
-
if (!m) continue;
|
|
56
|
-
const p = m[1].trim();
|
|
57
|
-
if (!p || p.startsWith("(")) continue; // skip parenthetical notes / placeholders — real paths never start with "("
|
|
58
|
-
claimedFiles.push(p.replace(/\\/g, "/"));
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
// ---- helpers --------------------------------------------------------------
|
|
63
|
-
|
|
64
|
-
const
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
p = p.
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
const
|
|
103
|
-
|
|
104
|
-
//
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
const
|
|
113
|
-
|
|
114
|
-
if (
|
|
115
|
-
if (
|
|
116
|
-
if (
|
|
117
|
-
if (
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
const
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
if (line.startsWith("
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
else verdict = "
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
const
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
`-
|
|
193
|
-
`-
|
|
194
|
-
`-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// verify-checkpoint.mjs — machine-produced verification for the tasks plugin.
|
|
3
|
+
//
|
|
4
|
+
// Reconciles a task file's CLAIMS against ground truth, then stamps a tamper-evident
|
|
5
|
+
// `## Verification` block into the file. The model does not narrate this — the script
|
|
6
|
+
// observes git, runs the tests, and scans the diff itself, so a checkpoint cannot claim
|
|
7
|
+
// work that didn't happen. A `status: done` task with any failed check is downgraded to
|
|
8
|
+
// FAIL.
|
|
9
|
+
//
|
|
10
|
+
// Usage: node verify-checkpoint.mjs [slug]
|
|
11
|
+
// cwd must be the project root. With no slug, auto-detects the single active task.
|
|
12
|
+
//
|
|
13
|
+
// v1 checks: (1) git Files-touched reconciliation, (2) real test run
|
|
14
|
+
// (tasks/.verify.json override, else package.json `test`), (3) honesty scan of the diff
|
|
15
|
+
// for TODO/FIXME/placeholder. Each check degrades gracefully and never reports a pass
|
|
16
|
+
// it didn't observe.
|
|
17
|
+
|
|
18
|
+
import { readFileSync, writeFileSync, readdirSync, existsSync } from "node:fs";
|
|
19
|
+
import { execSync, execFileSync } from "node:child_process";
|
|
20
|
+
import { join, isAbsolute } from "node:path";
|
|
21
|
+
import { homedir } from "node:os";
|
|
22
|
+
|
|
23
|
+
const cwd = process.cwd();
|
|
24
|
+
const tasksDir = join(cwd, "tasks");
|
|
25
|
+
|
|
26
|
+
function fail(msg) {
|
|
27
|
+
process.stderr.write(msg + "\n");
|
|
28
|
+
process.exit(1);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// ---- locate the task file -------------------------------------------------
|
|
32
|
+
let slug = process.argv[2];
|
|
33
|
+
if (!slug) {
|
|
34
|
+
if (!existsSync(tasksDir)) fail("No ./tasks/ directory and no slug given.");
|
|
35
|
+
const active = readdirSync(tasksDir).filter((n) => {
|
|
36
|
+
if (!n.endsWith(".md")) return false;
|
|
37
|
+
const fm = readFileSync(join(tasksDir, n), "utf8").match(/^---\r?\n([\s\S]*?)\r?\n---/);
|
|
38
|
+
return fm && /^status:\s*active\s*$/m.test(fm[1]);
|
|
39
|
+
});
|
|
40
|
+
if (active.length === 1) slug = active[0].replace(/\.md$/, "");
|
|
41
|
+
else fail(`Need a slug — ${active.length} active tasks found.`);
|
|
42
|
+
}
|
|
43
|
+
const taskPath = join(tasksDir, `${slug}.md`);
|
|
44
|
+
if (!existsSync(taskPath)) fail(`No task file at tasks/${slug}.md`);
|
|
45
|
+
let doc = readFileSync(taskPath, "utf8");
|
|
46
|
+
|
|
47
|
+
// ---- parse claims ---------------------------------------------------------
|
|
48
|
+
const status = (doc.match(/^status:\s*(\w+)/m) || [, "active"])[1];
|
|
49
|
+
|
|
50
|
+
const ftSection = doc.match(/##\s*Files touched\s*\r?\n([\s\S]*?)(?:\r?\n##\s|\s*$)/);
|
|
51
|
+
const claimedFiles = [];
|
|
52
|
+
if (ftSection) {
|
|
53
|
+
for (const line of ftSection[1].split(/\r?\n/)) {
|
|
54
|
+
const m = line.match(/^\s*-\s+(.+?)(?:\s+[—-]\s.*)?$/);
|
|
55
|
+
if (!m) continue;
|
|
56
|
+
const p = m[1].trim();
|
|
57
|
+
if (!p || p.startsWith("(")) continue; // skip parenthetical notes / placeholders — real paths never start with "("
|
|
58
|
+
claimedFiles.push(p.replace(/\\/g, "/"));
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ---- helpers --------------------------------------------------------------
|
|
63
|
+
// Pass args as an array via execFileSync — no shell, so nothing is compiled from a string.
|
|
64
|
+
const git = (cmd) => execFileSync("git", cmd.split(" "), { cwd, encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] });
|
|
65
|
+
const isRepo = (() => {
|
|
66
|
+
try {
|
|
67
|
+
return git("rev-parse --is-inside-work-tree").trim() === "true";
|
|
68
|
+
} catch {
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
71
|
+
})();
|
|
72
|
+
|
|
73
|
+
// Resolve a claimed path to an absolute, forward-slashed path (expanding a leading ~).
|
|
74
|
+
const resolveClaim = (claim) => {
|
|
75
|
+
let p = claim;
|
|
76
|
+
if (p === "~" || p.startsWith("~/")) p = join(homedir(), p.slice(1));
|
|
77
|
+
else if (!isAbsolute(p)) p = join(cwd, p);
|
|
78
|
+
return p.replace(/\\/g, "/");
|
|
79
|
+
};
|
|
80
|
+
// Repo root (toplevel) — used to tell in-repo claims from ones the verifier can't see.
|
|
81
|
+
const repoRoot = (() => {
|
|
82
|
+
if (!isRepo) return cwd.replace(/\\/g, "/");
|
|
83
|
+
try { return git("rev-parse --show-toplevel").trim().replace(/\\/g, "/"); }
|
|
84
|
+
catch { return cwd.replace(/\\/g, "/"); }
|
|
85
|
+
})();
|
|
86
|
+
const insideRepo = (abs) => (abs + "/").startsWith(repoRoot.replace(/\/+$/, "") + "/");
|
|
87
|
+
|
|
88
|
+
// ---- check 1: files-touched reconciliation --------------------------------
|
|
89
|
+
let filesLine, filesOk = true;
|
|
90
|
+
if (!isRepo) {
|
|
91
|
+
filesLine = "⏭️ not a git repo — file claims unverified";
|
|
92
|
+
} else {
|
|
93
|
+
const changed = [];
|
|
94
|
+
for (const raw of git("status --porcelain").split(/\r?\n/)) {
|
|
95
|
+
if (!raw.trim()) continue;
|
|
96
|
+
let p = raw.slice(3).trim().replace(/^"|"$/g, "");
|
|
97
|
+
if (p.includes(" -> ")) p = p.split(" -> ")[1];
|
|
98
|
+
p = p.replace(/\\/g, "/");
|
|
99
|
+
if (p.startsWith("tasks/")) continue; // ignore the task file & its sidecars
|
|
100
|
+
changed.push(p);
|
|
101
|
+
}
|
|
102
|
+
const matches = (claim, real) => real === claim || real.endsWith("/" + claim) || claim.endsWith("/" + real) || real.endsWith(claim);
|
|
103
|
+
const unmetAll = claimedFiles.filter((c) => !changed.some((r) => matches(c, r)));
|
|
104
|
+
// A claim git can't reconcile is only a contradiction if it's missing or in-repo-but-unchanged.
|
|
105
|
+
// One that exists on disk but outside this repo is unverifiable here, not a lie — don't fail on it.
|
|
106
|
+
const unmet = [], unverifiable = [];
|
|
107
|
+
for (const c of unmetAll) {
|
|
108
|
+
const abs = resolveClaim(c);
|
|
109
|
+
if (existsSync(abs) && !insideRepo(abs)) unverifiable.push(c);
|
|
110
|
+
else unmet.push(c);
|
|
111
|
+
}
|
|
112
|
+
const unclaimed = changed.filter((r) => !claimedFiles.some((c) => matches(c, r)));
|
|
113
|
+
const parts = [];
|
|
114
|
+
if (claimedFiles.length === 0) parts.push("no files claimed");
|
|
115
|
+
if (unmet.length) { parts.push("❌ claimed but not changed: " + unmet.join(", ")); filesOk = false; }
|
|
116
|
+
if (unverifiable.length) parts.push("⏭️ claimed but outside this repo — unverifiable here: " + unverifiable.join(", "));
|
|
117
|
+
if (unclaimed.length) parts.push("⚠️ changed but not claimed: " + unclaimed.join(", "));
|
|
118
|
+
if (filesOk && !unverifiable.length && !unclaimed.length && claimedFiles.length)
|
|
119
|
+
parts.push(`✅ all ${claimedFiles.length} claimed path(s) present in the working tree`);
|
|
120
|
+
filesLine = parts.join("; ");
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// ---- check 2: tests -------------------------------------------------------
|
|
124
|
+
let testCmd = null;
|
|
125
|
+
const cfgPath = join(tasksDir, ".verify.json");
|
|
126
|
+
if (existsSync(cfgPath)) {
|
|
127
|
+
try {
|
|
128
|
+
const cfg = JSON.parse(readFileSync(cfgPath, "utf8"));
|
|
129
|
+
if (cfg.test === false) testCmd = false;
|
|
130
|
+
else if (typeof cfg.test === "string") testCmd = cfg.test;
|
|
131
|
+
} catch { /* ignore malformed config */ }
|
|
132
|
+
}
|
|
133
|
+
if (testCmd === null && existsSync(join(cwd, "package.json"))) {
|
|
134
|
+
try {
|
|
135
|
+
const pkg = JSON.parse(readFileSync(join(cwd, "package.json"), "utf8"));
|
|
136
|
+
if (pkg.scripts && pkg.scripts.test) testCmd = "npm test";
|
|
137
|
+
} catch { /* ignore */ }
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
let testsLine, testsOk = true;
|
|
141
|
+
if (testCmd === false) {
|
|
142
|
+
testsLine = "⏭️ tests skipped (disabled in tasks/.verify.json)";
|
|
143
|
+
} else if (!testCmd) {
|
|
144
|
+
testsLine = "⏭️ no test command found — test status unverified";
|
|
145
|
+
} else {
|
|
146
|
+
try {
|
|
147
|
+
execSync(testCmd, { cwd, encoding: "utf8", timeout: 120000, stdio: ["ignore", "pipe", "pipe"] });
|
|
148
|
+
testsLine = `✅ \`${testCmd}\` passed`;
|
|
149
|
+
} catch (e) {
|
|
150
|
+
testsOk = false;
|
|
151
|
+
const tail = String(e.stdout || e.stderr || "").trim().split(/\r?\n/).filter(Boolean).pop() || "";
|
|
152
|
+
testsLine = e.killed
|
|
153
|
+
? `❌ \`${testCmd}\` timed out after 120s`
|
|
154
|
+
: `❌ \`${testCmd}\` failed (exit ${e.status})${tail ? ": " + tail.slice(0, 160) : ""}`;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// ---- check 3: honesty scan of the diff ------------------------------------
|
|
159
|
+
let honestyLine, honestyOk = true;
|
|
160
|
+
if (!isRepo) {
|
|
161
|
+
honestyLine = "⏭️ not a git repo — diff not scanned";
|
|
162
|
+
} else {
|
|
163
|
+
let diff = "";
|
|
164
|
+
try { diff = git("diff HEAD"); } catch { try { diff = git("diff"); } catch { /* no diff */ } }
|
|
165
|
+
const pat = /\b(TODO|FIXME|XXX|HACK)\b|not implemented|placeholder/i;
|
|
166
|
+
const hits = [];
|
|
167
|
+
let file = "";
|
|
168
|
+
for (const line of diff.split(/\r?\n/)) {
|
|
169
|
+
if (line.startsWith("+++ b/")) { file = line.slice(6); continue; }
|
|
170
|
+
if (line.startsWith("+") && !line.startsWith("+++") && pat.test(line)) {
|
|
171
|
+
hits.push(`${file}: ${line.slice(1).trim().slice(0, 80)}`);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
if (hits.length) {
|
|
175
|
+
honestyOk = false;
|
|
176
|
+
honestyLine = `❌ ${hits.length} unfinished marker(s) in diff — ` + hits.slice(0, 3).join(" | ") + (hits.length > 3 ? " …" : "");
|
|
177
|
+
} else {
|
|
178
|
+
honestyLine = "✅ no TODO/FIXME/placeholder in the diff";
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// ---- verdict --------------------------------------------------------------
|
|
183
|
+
const allOk = filesOk && testsOk && honestyOk;
|
|
184
|
+
let verdict;
|
|
185
|
+
if (allOk) verdict = "✅ PASS";
|
|
186
|
+
else if (status === "done") verdict = "❌ FAIL — marked `done` but checks above did not pass";
|
|
187
|
+
else verdict = "⚠️ WARN — open issues above (acceptable while `status: active`)";
|
|
188
|
+
|
|
189
|
+
const stamp = new Date().toISOString();
|
|
190
|
+
const block =
|
|
191
|
+
`## Verification (machine-checked ${stamp})\n` +
|
|
192
|
+
`- Files touched: ${filesLine}\n` +
|
|
193
|
+
`- Tests: ${testsLine}\n` +
|
|
194
|
+
`- Honesty scan: ${honestyLine}\n` +
|
|
195
|
+
`- Verdict: ${verdict}\n`;
|
|
196
|
+
|
|
197
|
+
// replace any existing Verification section, else append
|
|
198
|
+
doc = doc.replace(/\n##\s*Verification[\s\S]*?(?=\n##\s|\s*$)/, "").replace(/\s*$/, "\n");
|
|
199
|
+
writeFileSync(taskPath, doc + "\n" + block, "utf8");
|
|
200
|
+
|
|
201
|
+
process.stdout.write(`Verification stamped into tasks/${slug}.md\n\n${block}`);
|
package/package.json
CHANGED
|
@@ -1,26 +1,27 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "@wardrail/plugin",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"description": "Wardrail Claude Code plugin — consult your project's contract while coding (MCP) plus a checkpoint->clear->resume workflow with machine-verified checkpoints.",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"files": [
|
|
7
|
-
".claude-plugin",
|
|
8
|
-
"skills",
|
|
9
|
-
"hooks",
|
|
10
|
-
".mcp.json",
|
|
11
|
-
"README.md"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
"
|
|
19
|
-
"claude-code
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
"
|
|
26
|
-
|
|
1
|
+
{
|
|
2
|
+
"name": "@wardrail/plugin",
|
|
3
|
+
"version": "0.1.2",
|
|
4
|
+
"description": "Wardrail Claude Code plugin — consult your project's contract while coding (MCP) plus a checkpoint->clear->resume workflow with machine-verified checkpoints.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"files": [
|
|
7
|
+
".claude-plugin",
|
|
8
|
+
"skills",
|
|
9
|
+
"hooks",
|
|
10
|
+
".mcp.json",
|
|
11
|
+
"README.md",
|
|
12
|
+
"LICENSE"
|
|
13
|
+
],
|
|
14
|
+
"engines": {
|
|
15
|
+
"node": ">=20"
|
|
16
|
+
},
|
|
17
|
+
"keywords": [
|
|
18
|
+
"wardrail",
|
|
19
|
+
"claude-code",
|
|
20
|
+
"claude-code-plugin",
|
|
21
|
+
"mcp",
|
|
22
|
+
"guardrails",
|
|
23
|
+
"checkpoint"
|
|
24
|
+
],
|
|
25
|
+
"license": "UNLICENSED",
|
|
26
|
+
"private": false
|
|
27
|
+
}
|
|
@@ -1,61 +1,61 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: checkpoint
|
|
3
|
-
description: "Write the current working state into the active task file right before a /clear, so a fresh session can resume cheaply. This is the critical step in the checkpoint→clear→resume workflow — run it whenever a long session is getting expensive and the work isn't finished. Pairs with the `task` skill."
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
# Checkpoint
|
|
7
|
-
|
|
8
|
-
Capture everything the next session needs into `./tasks/<slug>.md`, then hand off. The
|
|
9
|
-
flow this completes is: **`/checkpoint` → `/clear` → `/task resume <slug>`**. After the
|
|
10
|
-
checkpoint, the transcript is disposable; the task file carries the state.
|
|
11
|
-
|
|
12
|
-
## Steps
|
|
13
|
-
|
|
14
|
-
1. **Find the active task.** Look in `./tasks/` for files with `status: active`.
|
|
15
|
-
- Exactly one → use it.
|
|
16
|
-
- Several → ask which one (or checkpoint the one this session was actually working
|
|
17
|
-
on, if that's unambiguous).
|
|
18
|
-
- None → offer to run `/task new <slug>` first.
|
|
19
|
-
|
|
20
|
-
2. **Update the file to reflect reality now.** Edit in place:
|
|
21
|
-
- **Objective** — sharpen only if it genuinely changed; don't churn it.
|
|
22
|
-
- **Decisions** — append choices settled this session, each with its one-line reason,
|
|
23
|
-
so the next session doesn't relitigate them.
|
|
24
|
-
- **Files touched** — add/adjust the paths changed this session with a short note.
|
|
25
|
-
- **Next step** — the single most important thing: the exact next concrete action,
|
|
26
|
-
specific enough to start cold. One action, not a list.
|
|
27
|
-
|
|
28
|
-
3. **Be honest.** Write only what is actually true right now. Do not record planned
|
|
29
|
-
work as done, do not claim a passing test you didn't run. A false checkpoint is
|
|
30
|
-
worse than none — it makes the next session build on a lie.
|
|
31
|
-
|
|
32
|
-
4. **Redirect heavy outputs.** If this session generated large tool outputs (build logs,
|
|
33
|
-
test dumps) worth keeping, note their file path in the task file and `tail` them —
|
|
34
|
-
don't paste them in. Big outputs are a top context cost.
|
|
35
|
-
|
|
36
|
-
5. **Verify against ground truth — do not self-attest.** From the project root, run:
|
|
37
|
-
`node "${CLAUDE_PLUGIN_ROOT}/hooks/verify-checkpoint.mjs" <slug>`. It reconciles the
|
|
38
|
-
file's claims against `git`, a real test run, and a diff scan, then writes a
|
|
39
|
-
`## Verification` block into the task file itself. Then:
|
|
40
|
-
- Read the verdict. If it is **not PASS**, the checkpoint is not done: either fix the
|
|
41
|
-
underlying issue and re-run, or keep `status: active` and point Next step at exactly
|
|
42
|
-
what the verdict flagged. **Never set `status: done` over a FAIL.**
|
|
43
|
-
- Do not hand-edit the `## Verification` block — it is the machine's record, not
|
|
44
|
-
yours. Surface the verdict to the user in your handoff.
|
|
45
|
-
- If verification can't apply (not a git repo, files live outside this repo, no
|
|
46
|
-
tests), say so plainly rather than implying a pass.
|
|
47
|
-
|
|
48
|
-
6. **Drop the clear-ready sentinel.** Write the active task's slug into
|
|
49
|
-
`./tasks/.clear-ready` (overwrite; create if missing). This is a transient marker the
|
|
50
|
-
`Stop` hook consumes once to back up the clear nudge — it deletes itself, so it
|
|
51
|
-
normally won't linger in the working tree.
|
|
52
|
-
|
|
53
|
-
7. **Hand off.** Confirm the path written, the verification verdict, and the Next step,
|
|
54
|
-
then tell the user it's safe to `/clear` and later run `/task resume <slug>`.
|
|
55
|
-
|
|
56
|
-
## Why manual
|
|
57
|
-
|
|
58
|
-
Auto-clearing is unsafe: judging whether work is at a clean stopping point needs
|
|
59
|
-
judgment. Two keystrokes (`/checkpoint`, then `/clear`) is the target — deliberate, not
|
|
60
|
-
automatic. This cannot beat `/compact` *within* a session; it makes the boundary
|
|
61
|
-
between sessions cheap. See [task] for the file format and resume step.
|
|
1
|
+
---
|
|
2
|
+
name: checkpoint
|
|
3
|
+
description: "Write the current working state into the active task file right before a /clear, so a fresh session can resume cheaply. This is the critical step in the checkpoint→clear→resume workflow — run it whenever a long session is getting expensive and the work isn't finished. Pairs with the `task` skill."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Checkpoint
|
|
7
|
+
|
|
8
|
+
Capture everything the next session needs into `./tasks/<slug>.md`, then hand off. The
|
|
9
|
+
flow this completes is: **`/checkpoint` → `/clear` → `/task resume <slug>`**. After the
|
|
10
|
+
checkpoint, the transcript is disposable; the task file carries the state.
|
|
11
|
+
|
|
12
|
+
## Steps
|
|
13
|
+
|
|
14
|
+
1. **Find the active task.** Look in `./tasks/` for files with `status: active`.
|
|
15
|
+
- Exactly one → use it.
|
|
16
|
+
- Several → ask which one (or checkpoint the one this session was actually working
|
|
17
|
+
on, if that's unambiguous).
|
|
18
|
+
- None → offer to run `/task new <slug>` first.
|
|
19
|
+
|
|
20
|
+
2. **Update the file to reflect reality now.** Edit in place:
|
|
21
|
+
- **Objective** — sharpen only if it genuinely changed; don't churn it.
|
|
22
|
+
- **Decisions** — append choices settled this session, each with its one-line reason,
|
|
23
|
+
so the next session doesn't relitigate them.
|
|
24
|
+
- **Files touched** — add/adjust the paths changed this session with a short note.
|
|
25
|
+
- **Next step** — the single most important thing: the exact next concrete action,
|
|
26
|
+
specific enough to start cold. One action, not a list.
|
|
27
|
+
|
|
28
|
+
3. **Be honest.** Write only what is actually true right now. Do not record planned
|
|
29
|
+
work as done, do not claim a passing test you didn't run. A false checkpoint is
|
|
30
|
+
worse than none — it makes the next session build on a lie.
|
|
31
|
+
|
|
32
|
+
4. **Redirect heavy outputs.** If this session generated large tool outputs (build logs,
|
|
33
|
+
test dumps) worth keeping, note their file path in the task file and `tail` them —
|
|
34
|
+
don't paste them in. Big outputs are a top context cost.
|
|
35
|
+
|
|
36
|
+
5. **Verify against ground truth — do not self-attest.** From the project root, run:
|
|
37
|
+
`node "${CLAUDE_PLUGIN_ROOT}/hooks/verify-checkpoint.mjs" <slug>`. It reconciles the
|
|
38
|
+
file's claims against `git`, a real test run, and a diff scan, then writes a
|
|
39
|
+
`## Verification` block into the task file itself. Then:
|
|
40
|
+
- Read the verdict. If it is **not PASS**, the checkpoint is not done: either fix the
|
|
41
|
+
underlying issue and re-run, or keep `status: active` and point Next step at exactly
|
|
42
|
+
what the verdict flagged. **Never set `status: done` over a FAIL.**
|
|
43
|
+
- Do not hand-edit the `## Verification` block — it is the machine's record, not
|
|
44
|
+
yours. Surface the verdict to the user in your handoff.
|
|
45
|
+
- If verification can't apply (not a git repo, files live outside this repo, no
|
|
46
|
+
tests), say so plainly rather than implying a pass.
|
|
47
|
+
|
|
48
|
+
6. **Drop the clear-ready sentinel.** Write the active task's slug into
|
|
49
|
+
`./tasks/.clear-ready` (overwrite; create if missing). This is a transient marker the
|
|
50
|
+
`Stop` hook consumes once to back up the clear nudge — it deletes itself, so it
|
|
51
|
+
normally won't linger in the working tree.
|
|
52
|
+
|
|
53
|
+
7. **Hand off.** Confirm the path written, the verification verdict, and the Next step,
|
|
54
|
+
then tell the user it's safe to `/clear` and later run `/task resume <slug>`.
|
|
55
|
+
|
|
56
|
+
## Why manual
|
|
57
|
+
|
|
58
|
+
Auto-clearing is unsafe: judging whether work is at a clean stopping point needs
|
|
59
|
+
judgment. Two keystrokes (`/checkpoint`, then `/clear`) is the target — deliberate, not
|
|
60
|
+
automatic. This cannot beat `/compact` *within* a session; it makes the boundary
|
|
61
|
+
between sessions cheap. See [task] for the file format and resume step.
|
package/skills/task/SKILL.md
CHANGED
|
@@ -1,69 +1,69 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: task
|
|
3
|
-
description: "Manage long-session task files for the checkpoint→clear→resume workflow. Invoke as `/task new <slug>` to scaffold a task file, or `/task resume <slug>` to rehydrate a fresh session from one. Task files live in ./tasks/ in the current project. The point is to make /clear cheap: resume reloads a ~3k-token brief instead of carrying a 150k transcript."
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
# Task files
|
|
7
|
-
|
|
8
|
-
A task file at `./tasks/<slug>.md` (relative to the current project) is a small,
|
|
9
|
-
honest snapshot of in-flight work. It exists so a long session can be `/clear`-ed and
|
|
10
|
-
a fresh one rehydrated from ~3k tokens instead of the whole transcript. The savings
|
|
11
|
-
happen at the boundary between sessions, not within one.
|
|
12
|
-
|
|
13
|
-
The model is stateless and the harness re-sends the full transcript every turn — you
|
|
14
|
-
cannot make the AI "read less from the top." The only lever is a smaller window. Task
|
|
15
|
-
files are that lever: cheap rehydration after `/clear`.
|
|
16
|
-
|
|
17
|
-
## File format
|
|
18
|
-
|
|
19
|
-
```markdown
|
|
20
|
-
---
|
|
21
|
-
status: active # active | done
|
|
22
|
-
related: [] # other task slugs this depends on, e.g. [auth-rework]
|
|
23
|
-
---
|
|
24
|
-
|
|
25
|
-
# <One-line objective title>
|
|
26
|
-
|
|
27
|
-
## Objective
|
|
28
|
-
What "done" means, stated so it can be verified (a test, a command, an observable
|
|
29
|
-
behaviour). Not "make it work."
|
|
30
|
-
|
|
31
|
-
## Decisions
|
|
32
|
-
- Settled choices not to relitigate, each with the one-line reason.
|
|
33
|
-
|
|
34
|
-
## Files touched
|
|
35
|
-
- path/to/file — what changed / what it's for
|
|
36
|
-
|
|
37
|
-
## Next step
|
|
38
|
-
The single next concrete action. One thing, not a list.
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
## Dispatch on the argument
|
|
42
|
-
|
|
43
|
-
### `/task new <slug>`
|
|
44
|
-
|
|
45
|
-
1. Ensure `./tasks/` exists (create it if not).
|
|
46
|
-
2. If `./tasks/<slug>.md` already exists, stop and say so — do not overwrite.
|
|
47
|
-
3. Write the scaffold above. Fill **Objective** from the current conversation if there
|
|
48
|
-
is enough context to state it verifiably; otherwise leave a one-line prompt for the
|
|
49
|
-
user to complete. Leave Decisions / Files touched / Next step minimal but real —
|
|
50
|
-
never invent progress that hasn't happened.
|
|
51
|
-
4. Tell the user the path and that `/checkpoint` will keep it current before a `/clear`.
|
|
52
|
-
|
|
53
|
-
### `/task resume <slug>`
|
|
54
|
-
|
|
55
|
-
1. Read `./tasks/<slug>.md`. If `status: done`, say so and ask whether to reopen.
|
|
56
|
-
2. Read each task listed in `related:` (those files only).
|
|
57
|
-
3. Read **only** the files named under "Files touched" that you actually need for the
|
|
58
|
-
Next step — not the whole repo. CLAUDE.md and memory are already auto-loaded; do not
|
|
59
|
-
re-read them.
|
|
60
|
-
4. Give the user a 3–5 line orientation: the Objective, the key Decisions, and the
|
|
61
|
-
Next step you're about to take. Then proceed with that Next step.
|
|
62
|
-
|
|
63
|
-
Load nothing beyond the above. Pulling in extra context defeats the purpose.
|
|
64
|
-
|
|
65
|
-
## Honesty
|
|
66
|
-
|
|
67
|
-
A stale or optimistic task file is worse than none — it misleads the next session into
|
|
68
|
-
building on work that didn't happen. Everything written must be true at write time.
|
|
69
|
-
See [checkpoint] for the write-before-clear step.
|
|
1
|
+
---
|
|
2
|
+
name: task
|
|
3
|
+
description: "Manage long-session task files for the checkpoint→clear→resume workflow. Invoke as `/task new <slug>` to scaffold a task file, or `/task resume <slug>` to rehydrate a fresh session from one. Task files live in ./tasks/ in the current project. The point is to make /clear cheap: resume reloads a ~3k-token brief instead of carrying a 150k transcript."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Task files
|
|
7
|
+
|
|
8
|
+
A task file at `./tasks/<slug>.md` (relative to the current project) is a small,
|
|
9
|
+
honest snapshot of in-flight work. It exists so a long session can be `/clear`-ed and
|
|
10
|
+
a fresh one rehydrated from ~3k tokens instead of the whole transcript. The savings
|
|
11
|
+
happen at the boundary between sessions, not within one.
|
|
12
|
+
|
|
13
|
+
The model is stateless and the harness re-sends the full transcript every turn — you
|
|
14
|
+
cannot make the AI "read less from the top." The only lever is a smaller window. Task
|
|
15
|
+
files are that lever: cheap rehydration after `/clear`.
|
|
16
|
+
|
|
17
|
+
## File format
|
|
18
|
+
|
|
19
|
+
```markdown
|
|
20
|
+
---
|
|
21
|
+
status: active # active | done
|
|
22
|
+
related: [] # other task slugs this depends on, e.g. [auth-rework]
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
# <One-line objective title>
|
|
26
|
+
|
|
27
|
+
## Objective
|
|
28
|
+
What "done" means, stated so it can be verified (a test, a command, an observable
|
|
29
|
+
behaviour). Not "make it work."
|
|
30
|
+
|
|
31
|
+
## Decisions
|
|
32
|
+
- Settled choices not to relitigate, each with the one-line reason.
|
|
33
|
+
|
|
34
|
+
## Files touched
|
|
35
|
+
- path/to/file — what changed / what it's for
|
|
36
|
+
|
|
37
|
+
## Next step
|
|
38
|
+
The single next concrete action. One thing, not a list.
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Dispatch on the argument
|
|
42
|
+
|
|
43
|
+
### `/task new <slug>`
|
|
44
|
+
|
|
45
|
+
1. Ensure `./tasks/` exists (create it if not).
|
|
46
|
+
2. If `./tasks/<slug>.md` already exists, stop and say so — do not overwrite.
|
|
47
|
+
3. Write the scaffold above. Fill **Objective** from the current conversation if there
|
|
48
|
+
is enough context to state it verifiably; otherwise leave a one-line prompt for the
|
|
49
|
+
user to complete. Leave Decisions / Files touched / Next step minimal but real —
|
|
50
|
+
never invent progress that hasn't happened.
|
|
51
|
+
4. Tell the user the path and that `/checkpoint` will keep it current before a `/clear`.
|
|
52
|
+
|
|
53
|
+
### `/task resume <slug>`
|
|
54
|
+
|
|
55
|
+
1. Read `./tasks/<slug>.md`. If `status: done`, say so and ask whether to reopen.
|
|
56
|
+
2. Read each task listed in `related:` (those files only).
|
|
57
|
+
3. Read **only** the files named under "Files touched" that you actually need for the
|
|
58
|
+
Next step — not the whole repo. CLAUDE.md and memory are already auto-loaded; do not
|
|
59
|
+
re-read them.
|
|
60
|
+
4. Give the user a 3–5 line orientation: the Objective, the key Decisions, and the
|
|
61
|
+
Next step you're about to take. Then proceed with that Next step.
|
|
62
|
+
|
|
63
|
+
Load nothing beyond the above. Pulling in extra context defeats the purpose.
|
|
64
|
+
|
|
65
|
+
## Honesty
|
|
66
|
+
|
|
67
|
+
A stale or optimistic task file is worse than none — it misleads the next session into
|
|
68
|
+
building on work that didn't happen. Everything written must be true at write time.
|
|
69
|
+
See [checkpoint] for the write-before-clear step.
|