xtrm-tools 0.5.10 → 0.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -1
- package/README.md +28 -30
- package/cli/dist/index.cjs +1509 -2722
- package/cli/dist/index.cjs.map +1 -1
- package/cli/package.json +1 -1
- package/config/instructions/agents-top.md +87 -23
- package/config/instructions/claude-top.md +101 -23
- package/config/pi/extensions/beads/index.ts +3 -1
- package/config/pi/extensions/session-flow/index.ts +26 -90
- package/config/pi/extensions/xtrm-loader/index.ts +39 -2
- package/hooks/README.md +0 -14
- package/hooks/beads-gate-messages.mjs +8 -22
- package/hooks/gitnexus/gitnexus-hook.cjs +1 -1
- package/hooks/hooks.json +25 -27
- package/hooks/quality-check-env.mjs +79 -0
- package/hooks/quality-check.cjs +6 -6
- package/hooks/statusline.mjs +115 -0
- package/hooks/using-xtrm-reminder.mjs +35 -0
- package/package.json +1 -1
- package/skills/sync-docs-workspace/iteration-1/benchmark.json +293 -0
- package/skills/sync-docs-workspace/iteration-1/benchmark.md +13 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/outputs/result.md +210 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/grading.json +28 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/outputs/result.md +101 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/grading.json +28 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/outputs/result.md +198 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/grading.json +28 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/outputs/result.md +94 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/grading.json +28 -0
- package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/outputs/result.md +237 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/grading.json +28 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/outputs/result.md +134 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/grading.json +28 -0
- package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-2/benchmark.json +297 -0
- package/skills/sync-docs-workspace/iteration-2/benchmark.md +13 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/outputs/result.md +137 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/grading.json +92 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/outputs/result.md +134 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/grading.json +86 -0
- package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/outputs/result.md +193 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/grading.json +72 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/outputs/result.md +211 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/grading.json +91 -0
- package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/outputs/result.md +182 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/grading.json +95 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/timing.json +1 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/outputs/result.md +222 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/grading.json +88 -0
- package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-3/benchmark.json +298 -0
- package/skills/sync-docs-workspace/iteration-3/benchmark.md +13 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/outputs/result.md +125 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/grading.json +97 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/outputs/result.md +144 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/grading.json +78 -0
- package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/outputs/result.md +104 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/grading.json +91 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/outputs/result.md +79 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/grading.json +82 -0
- package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/eval_metadata.json +27 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase1_context.json +302 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase2_drift.txt +33 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase3_analysis.json +114 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase4_fix.txt +118 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase5_validate.txt +38 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/result.md +158 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/grading.json +95 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/timing.json +5 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/outputs/result.md +71 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json +90 -0
- package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/timing.json +5 -0
- package/skills/using-xtrm/SKILL.md +84 -205
- package/config/pi/extensions/bg-process/index.ts +0 -230
- package/config/pi/extensions/bg-process/package.json +0 -16
- package/config/pi/extensions/minimal-mode/index.ts +0 -201
- package/config/pi/extensions/minimal-mode/package.json +0 -16
- package/config/pi/extensions/todo/index.ts +0 -299
- package/config/pi/extensions/todo/package.json +0 -16
- package/hooks/agent_context.py +0 -105
- package/hooks/branch-state.mjs +0 -39
- package/hooks/guard-rules.mjs +0 -118
- package/hooks/main-guard-post-push.mjs +0 -71
- package/hooks/main-guard.mjs +0 -119
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// SessionStart hook — verify quality gate environment is intact.
|
|
3
|
+
// Checks for tsc, eslint, ruff so the agent knows early if enforcement
|
|
4
|
+
// is silently degraded. Exits 0 always (informational only).
|
|
5
|
+
|
|
6
|
+
import { readFileSync, existsSync } from 'node:fs';
|
|
7
|
+
import { execSync } from 'node:child_process';
|
|
8
|
+
import path from 'node:path';
|
|
9
|
+
|
|
10
|
+
let input;
|
|
11
|
+
try {
|
|
12
|
+
input = JSON.parse(readFileSync(0, 'utf8'));
|
|
13
|
+
} catch {
|
|
14
|
+
process.exit(0);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
const cwd = input.cwd ?? process.env.CLAUDE_PROJECT_DIR ?? process.cwd();
|
|
18
|
+
|
|
19
|
+
// Only relevant in projects that have quality gates wired
|
|
20
|
+
const pluginRoot = process.env.CLAUDE_PLUGIN_ROOT ?? '';
|
|
21
|
+
const hookPresent =
|
|
22
|
+
existsSync(path.join(pluginRoot, 'hooks', 'quality-check.cjs')) ||
|
|
23
|
+
existsSync(path.join(cwd, '.claude', 'hooks', 'quality-check.cjs'));
|
|
24
|
+
|
|
25
|
+
if (!hookPresent) process.exit(0);
|
|
26
|
+
|
|
27
|
+
function which(cmd) {
|
|
28
|
+
try {
|
|
29
|
+
execSync(`which ${cmd}`, { stdio: 'ignore' });
|
|
30
|
+
return true;
|
|
31
|
+
} catch {
|
|
32
|
+
// fall through to local node_modules probe
|
|
33
|
+
}
|
|
34
|
+
// Check node_modules/.bin/ walking up from cwd
|
|
35
|
+
let dir = cwd;
|
|
36
|
+
while (true) {
|
|
37
|
+
if (existsSync(path.join(dir, 'node_modules', '.bin', cmd))) return true;
|
|
38
|
+
const parent = path.dirname(dir);
|
|
39
|
+
if (parent === dir) break;
|
|
40
|
+
dir = parent;
|
|
41
|
+
}
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const warnings = [];
|
|
46
|
+
|
|
47
|
+
// CLAUDE_PROJECT_DIR check
|
|
48
|
+
if (!process.env.CLAUDE_PROJECT_DIR) {
|
|
49
|
+
warnings.push('CLAUDE_PROJECT_DIR is not set — quality gate may target wrong directory');
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// TypeScript project checks
|
|
53
|
+
const hasTsConfig = existsSync(path.join(cwd, 'tsconfig.json')) ||
|
|
54
|
+
existsSync(path.join(cwd, 'cli', 'tsconfig.json'));
|
|
55
|
+
|
|
56
|
+
if (hasTsConfig) {
|
|
57
|
+
if (!which('tsc')) warnings.push('tsc not found — TypeScript compilation check will be skipped');
|
|
58
|
+
const hasEslintConfig = ['eslint.config.js', 'eslint.config.mjs', '.eslintrc.js', '.eslintrc.json', '.eslintrc.yml']
|
|
59
|
+
.some(f => existsSync(path.join(cwd, f)));
|
|
60
|
+
if (hasEslintConfig && !which('eslint')) warnings.push('eslint not found — ESLint check will be skipped');
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Python project checks
|
|
64
|
+
const hasPyFiles = existsSync(path.join(cwd, 'pyproject.toml')) ||
|
|
65
|
+
existsSync(path.join(cwd, 'setup.py')) ||
|
|
66
|
+
existsSync(path.join(cwd, 'requirements.txt'));
|
|
67
|
+
|
|
68
|
+
if (hasPyFiles) {
|
|
69
|
+
if (!which('ruff')) warnings.push('ruff not found — Python lint check will be skipped');
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (warnings.length === 0) process.exit(0);
|
|
73
|
+
|
|
74
|
+
const msg = `⚠️ Quality gate environment issue(s) detected:\n${warnings.map(w => ` • ${w}`).join('\n')}\nFix these to ensure quality gates enforce correctly.`;
|
|
75
|
+
|
|
76
|
+
process.stdout.write(JSON.stringify({
|
|
77
|
+
hookSpecificOutput: { additionalSystemPrompt: msg },
|
|
78
|
+
}));
|
|
79
|
+
process.exit(0);
|
package/hooks/quality-check.cjs
CHANGED
|
@@ -447,7 +447,7 @@ class QualityChecker {
|
|
|
447
447
|
if (/\.(ts|tsx)$/.test(filePath)) {
|
|
448
448
|
return 'typescript';
|
|
449
449
|
}
|
|
450
|
-
if (/\.(js|jsx)$/.test(filePath)) {
|
|
450
|
+
if (/\.(js|jsx|cjs|mjs)$/.test(filePath)) {
|
|
451
451
|
return 'javascript';
|
|
452
452
|
}
|
|
453
453
|
return 'unknown';
|
|
@@ -537,7 +537,7 @@ class QualityChecker {
|
|
|
537
537
|
const resolved = path.resolve(dir, importPath);
|
|
538
538
|
|
|
539
539
|
// Try common extensions
|
|
540
|
-
const extensions = ['.ts', '.tsx', '.js', '.jsx'];
|
|
540
|
+
const extensions = ['.ts', '.tsx', '.js', '.jsx', '.cjs', '.mjs'];
|
|
541
541
|
for (const ext of extensions) {
|
|
542
542
|
const fullPath = resolved + ext;
|
|
543
543
|
if (require('fs').existsSync(fullPath)) {
|
|
@@ -565,8 +565,8 @@ class QualityChecker {
|
|
|
565
565
|
return;
|
|
566
566
|
}
|
|
567
567
|
|
|
568
|
-
// Skip TypeScript checking for JavaScript files in hook directories
|
|
569
|
-
if (this.filePath
|
|
568
|
+
// Skip TypeScript checking for JavaScript/CJS/MJS files in hook directories
|
|
569
|
+
if (/\.(js|cjs|mjs)$/.test(this.filePath) && this.filePath.includes('.claude/hooks/')) {
|
|
570
570
|
log.debug('Skipping TypeScript check for JavaScript hook file');
|
|
571
571
|
return;
|
|
572
572
|
}
|
|
@@ -865,7 +865,7 @@ class QualityChecker {
|
|
|
865
865
|
const debuggerRule = config._fileConfig.rules?.debugger || {};
|
|
866
866
|
if (debuggerRule.enabled !== false) {
|
|
867
867
|
lines.forEach((line, index) => {
|
|
868
|
-
if (
|
|
868
|
+
if (/^\s*debugger\s*;/.test(line)) {
|
|
869
869
|
const severity = debuggerRule.severity || 'error';
|
|
870
870
|
const message =
|
|
871
871
|
debuggerRule.message || 'Remove debugger statements before committing';
|
|
@@ -1111,7 +1111,7 @@ async function fileExists(filePath) {
|
|
|
1111
1111
|
* @returns {boolean} True if source file
|
|
1112
1112
|
*/
|
|
1113
1113
|
function isSourceFile(filePath) {
|
|
1114
|
-
return /\.(ts|tsx|js|jsx)$/.test(filePath);
|
|
1114
|
+
return /\.(ts|tsx|js|jsx|cjs|mjs)$/.test(filePath);
|
|
1115
1115
|
}
|
|
1116
1116
|
|
|
1117
1117
|
/**
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// statusline.mjs — Claude Code statusLine command for xt claude worktree sessions
|
|
3
|
+
// Two lines:
|
|
4
|
+
// Line 1 (plain): XTRM ⎇ <branch>
|
|
5
|
+
// Line 2 (colored): ◐ <claim title in italics> OR ○ N open
|
|
6
|
+
// State file: .xtrm/statusline-claim (written by beads-claim-sync.mjs)
|
|
7
|
+
// Results cached 5s in /tmp to avoid hammering bd on every render.
|
|
8
|
+
|
|
9
|
+
import { execSync } from 'node:child_process';
|
|
10
|
+
import { readFileSync, writeFileSync, existsSync } from 'node:fs';
|
|
11
|
+
import { join } from 'node:path';
|
|
12
|
+
import { tmpdir } from 'node:os';
|
|
13
|
+
import { createHash } from 'node:crypto';
|
|
14
|
+
|
|
15
|
+
const cwd = process.cwd();
|
|
16
|
+
const cacheKey = createHash('md5').update(cwd).digest('hex').slice(0, 8);
|
|
17
|
+
const CACHE_FILE = join(tmpdir(), `xtrm-sl-${cacheKey}.json`);
|
|
18
|
+
const CACHE_TTL = 5000;
|
|
19
|
+
|
|
20
|
+
function run(cmd) {
|
|
21
|
+
try {
|
|
22
|
+
return execSync(cmd, {
|
|
23
|
+
encoding: 'utf8', cwd,
|
|
24
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
25
|
+
timeout: 2000,
|
|
26
|
+
}).trim();
|
|
27
|
+
} catch { return null; }
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function getCached() {
|
|
31
|
+
try {
|
|
32
|
+
const c = JSON.parse(readFileSync(CACHE_FILE, 'utf8'));
|
|
33
|
+
if (Date.now() - c.ts < CACHE_TTL) return c.data;
|
|
34
|
+
} catch {}
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function setCache(data) {
|
|
39
|
+
try { writeFileSync(CACHE_FILE, JSON.stringify({ ts: Date.now(), data })); } catch {}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// ANSI
|
|
43
|
+
const R = '\x1b[0m';
|
|
44
|
+
const BOLD = '\x1b[1m';
|
|
45
|
+
const BOLD_OFF = '\x1b[22m';
|
|
46
|
+
const ITALIC = '\x1b[3m';
|
|
47
|
+
const ITALIC_OFF = '\x1b[23m';
|
|
48
|
+
const FG_WHITE = '\x1b[38;5;15m';
|
|
49
|
+
const FG_ACCENT = '\x1b[38;5;75m';
|
|
50
|
+
const FG_MUTED = '\x1b[38;5;245m';
|
|
51
|
+
const BG_CLAIMED = '\x1b[48;5;17m';
|
|
52
|
+
const BG_IDLE = '\x1b[48;5;238m';
|
|
53
|
+
|
|
54
|
+
// Data
|
|
55
|
+
let data = getCached();
|
|
56
|
+
if (!data) {
|
|
57
|
+
const branch = run('git branch --show-current');
|
|
58
|
+
let claimTitle = null;
|
|
59
|
+
let openCount = 0;
|
|
60
|
+
|
|
61
|
+
const hasBeads = existsSync(join(cwd, '.beads'));
|
|
62
|
+
if (hasBeads) {
|
|
63
|
+
const claimFile = join(cwd, '.xtrm', 'statusline-claim');
|
|
64
|
+
let claimId = null;
|
|
65
|
+
if (existsSync(claimFile)) {
|
|
66
|
+
claimId = readFileSync(claimFile, 'utf8').trim() || null;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (claimId) {
|
|
70
|
+
try {
|
|
71
|
+
const raw = run(`bd show ${claimId} --json`);
|
|
72
|
+
if (raw) {
|
|
73
|
+
const parsed = JSON.parse(raw);
|
|
74
|
+
claimTitle = parsed?.[0]?.title ?? null;
|
|
75
|
+
}
|
|
76
|
+
} catch {}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (!claimTitle) {
|
|
80
|
+
const listOut = run('bd list');
|
|
81
|
+
const m = listOut?.match(/\((\d+)\s+open/);
|
|
82
|
+
if (m) openCount = parseInt(m[1], 10);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
data = { branch, claimTitle, openCount };
|
|
87
|
+
setCache(data);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Render
|
|
91
|
+
const { branch, claimTitle, openCount } = data;
|
|
92
|
+
const cols = process.stdout.columns || 80;
|
|
93
|
+
|
|
94
|
+
const brand = `${BOLD}${FG_ACCENT}XTRM${BOLD_OFF}${R}`;
|
|
95
|
+
const branchStr = branch ? `${FG_MUTED}⎇ ${branch}${R}` : '';
|
|
96
|
+
const line1 = [brand, branchStr].filter(Boolean).join(' ');
|
|
97
|
+
|
|
98
|
+
function padded(text, bg) {
|
|
99
|
+
const visible = text.replace(/\x1b\[[0-9;]*m/g, '');
|
|
100
|
+
const pad = Math.max(0, cols - visible.length);
|
|
101
|
+
return `${bg}${FG_WHITE}${text}${' '.repeat(pad)}${R}`;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
let line2;
|
|
105
|
+
if (claimTitle) {
|
|
106
|
+
const maxLen = cols - 4;
|
|
107
|
+
const title = claimTitle.length > maxLen ? claimTitle.slice(0, maxLen - 1) + '\u2026' : claimTitle;
|
|
108
|
+
line2 = padded(` \u25d0 ${ITALIC}${title}${ITALIC_OFF}`, BG_CLAIMED);
|
|
109
|
+
} else {
|
|
110
|
+
const idle = openCount > 0 ? `\u25cb ${openCount} open` : '\u25cb no open issues';
|
|
111
|
+
line2 = padded(` ${idle}`, BG_IDLE);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
process.stdout.write(line1 + '\n' + line2 + '\n');
|
|
115
|
+
process.exit(0);
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// using-xtrm-reminder.mjs — Claude Code SessionStart hook
|
|
3
|
+
// Reads skills/using-xtrm/SKILL.md and injects it as additionalSystemPrompt
|
|
4
|
+
// so the agent starts every session already oriented on the xtrm workflow.
|
|
5
|
+
// Exit 0 in all paths (fail open).
|
|
6
|
+
|
|
7
|
+
import { readFileSync } from 'node:fs';
|
|
8
|
+
import { join } from 'node:path';
|
|
9
|
+
|
|
10
|
+
let input;
|
|
11
|
+
try { input = JSON.parse(readFileSync(0, 'utf8')); } catch { process.exit(0); }
|
|
12
|
+
|
|
13
|
+
const pluginRoot = process.env.CLAUDE_PLUGIN_ROOT;
|
|
14
|
+
if (!pluginRoot) process.exit(0);
|
|
15
|
+
|
|
16
|
+
const skillPath = join(pluginRoot, 'skills', 'using-xtrm', 'SKILL.md');
|
|
17
|
+
let content;
|
|
18
|
+
try {
|
|
19
|
+
content = readFileSync(skillPath, 'utf8');
|
|
20
|
+
} catch {
|
|
21
|
+
process.exit(0);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Strip YAML frontmatter (--- ... ---\n)
|
|
25
|
+
content = content.replace(/^---[\s\S]*?---\n/, '').trim();
|
|
26
|
+
|
|
27
|
+
process.stdout.write(
|
|
28
|
+
JSON.stringify({
|
|
29
|
+
hookSpecificOutput: {
|
|
30
|
+
hookEventName: 'SessionStart',
|
|
31
|
+
additionalSystemPrompt: content,
|
|
32
|
+
},
|
|
33
|
+
}) + '\n',
|
|
34
|
+
);
|
|
35
|
+
process.exit(0);
|
package/package.json
CHANGED
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
{
|
|
2
|
+
"metadata": {
|
|
3
|
+
"skill_name": "sync-docs",
|
|
4
|
+
"skill_path": "<path/to/skill>",
|
|
5
|
+
"executor_model": "<model-name>",
|
|
6
|
+
"analyzer_model": "<model-name>",
|
|
7
|
+
"timestamp": "2026-03-18T07:43:29Z",
|
|
8
|
+
"evals_run": [
|
|
9
|
+
1,
|
|
10
|
+
2,
|
|
11
|
+
3
|
|
12
|
+
],
|
|
13
|
+
"runs_per_configuration": 3
|
|
14
|
+
},
|
|
15
|
+
"runs": [
|
|
16
|
+
{
|
|
17
|
+
"eval_id": 3,
|
|
18
|
+
"configuration": "with_skill",
|
|
19
|
+
"run_number": 1,
|
|
20
|
+
"result": {
|
|
21
|
+
"pass_rate": 0.75,
|
|
22
|
+
"passed": 3,
|
|
23
|
+
"failed": 1,
|
|
24
|
+
"total": 4,
|
|
25
|
+
"time_seconds": 0.0,
|
|
26
|
+
"tokens": 0,
|
|
27
|
+
"tool_calls": 0,
|
|
28
|
+
"errors": 0
|
|
29
|
+
},
|
|
30
|
+
"expectations": [
|
|
31
|
+
{
|
|
32
|
+
"text": "Ran doc_structure_analyzer.py and referenced its structured output",
|
|
33
|
+
"passed": true,
|
|
34
|
+
"evidence": "Ran doc_structure_analyzer.py, quoted its full structured output including EXTRACTABLE status, extraction candidates list, MISSING files, and INVALID_SCHEMA count."
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"text": "Named specific README sections with their suggested docs/ destination",
|
|
38
|
+
"passed": true,
|
|
39
|
+
"evidence": "Named: '## Policy System \u2192 docs/policies.md', '## MCP Servers \u2192 docs/mcp-servers.md', pi-extensions.md, plus context about CHANGELOG 6-day gap."
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"text": "Report is actionable \u2014 tells user exactly what to do next, not just observations",
|
|
43
|
+
"passed": true,
|
|
44
|
+
"evidence": "Report includes structured phase output, specific file names, notes CHANGELOG gap with exact dates, and references the 6-day staleness."
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"text": "Did not edit or create any files (audit only)",
|
|
48
|
+
"passed": false,
|
|
49
|
+
"evidence": "Agent ran --fix (created docs/pi-extensions.md, docs/mcp-servers.md, docs/policies.md) despite task being audit-only. Skill instructions for Phase 3 show the --fix command without making clear it is only for execute mode."
|
|
50
|
+
}
|
|
51
|
+
],
|
|
52
|
+
"notes": []
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"eval_id": 2,
|
|
56
|
+
"configuration": "with_skill",
|
|
57
|
+
"run_number": 1,
|
|
58
|
+
"result": {
|
|
59
|
+
"pass_rate": 0.75,
|
|
60
|
+
"passed": 3,
|
|
61
|
+
"failed": 1,
|
|
62
|
+
"total": 4,
|
|
63
|
+
"time_seconds": 0.0,
|
|
64
|
+
"tokens": 0,
|
|
65
|
+
"tool_calls": 0,
|
|
66
|
+
"errors": 0
|
|
67
|
+
},
|
|
68
|
+
"expectations": [
|
|
69
|
+
{
|
|
70
|
+
"text": "Ran doc_structure_analyzer.py with --fix flag",
|
|
71
|
+
"passed": true,
|
|
72
|
+
"evidence": "Ran `python3 skills/sync-docs/scripts/doc_structure_analyzer.py --fix --bd-remember` and included full output"
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"text": "Ran with --bd-remember or manually ran bd remember with a summary",
|
|
76
|
+
"passed": true,
|
|
77
|
+
"evidence": "bd remember stored with key 'sync-docs-fix-2026-03-18', confirmed stored:true in output JSON"
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
"text": "At least one scaffold file was created in docs/",
|
|
81
|
+
"passed": true,
|
|
82
|
+
"evidence": "Created docs/pi-extensions.md, docs/mcp-servers.md, docs/policies.md with valid frontmatter"
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"text": "Ran validate_doc.py on created files to confirm schema",
|
|
86
|
+
"passed": false,
|
|
87
|
+
"evidence": "Report notes 7 INVALID_SCHEMA files exist but does not show validate_doc.py being run explicitly to confirm the 3 new files pass. Only the JSON output showing valid frontmatter is evidence."
|
|
88
|
+
}
|
|
89
|
+
],
|
|
90
|
+
"notes": []
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
"eval_id": 1,
|
|
94
|
+
"configuration": "with_skill",
|
|
95
|
+
"run_number": 1,
|
|
96
|
+
"result": {
|
|
97
|
+
"pass_rate": 1.0,
|
|
98
|
+
"passed": 4,
|
|
99
|
+
"failed": 0,
|
|
100
|
+
"total": 4,
|
|
101
|
+
"time_seconds": 0.0,
|
|
102
|
+
"tokens": 0,
|
|
103
|
+
"tool_calls": 0,
|
|
104
|
+
"errors": 0
|
|
105
|
+
},
|
|
106
|
+
"expectations": [
|
|
107
|
+
{
|
|
108
|
+
"text": "Ran context_gatherer.py and reported bd closed issues or merged PRs from the output",
|
|
109
|
+
"passed": true,
|
|
110
|
+
"evidence": "Ran context_gatherer.py, reported 20 bd closed issues with IDs and titles, 3 merged PRs with SHAs and dates, 15 recent commits"
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"text": "Ran doc_structure_analyzer.py and used its output to identify doc issues",
|
|
114
|
+
"passed": true,
|
|
115
|
+
"evidence": "Ran doc_structure_analyzer.py, referenced MISSING status for docs/pi-extensions.md, hooks.md, mcp-servers.md, policies.md, skills.md and EXTRACTABLE for README"
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
"text": "Produced at least one concrete recommendation or action (not just a vague summary)",
|
|
119
|
+
"passed": true,
|
|
120
|
+
"evidence": "Named specific files: docs/pi-extensions.md, docs/hooks.md, docs/mcp-servers.md, docs/policies.md with explicit next steps for each"
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
"text": "Used the skill scripts rather than just reading files manually",
|
|
124
|
+
"passed": true,
|
|
125
|
+
"evidence": "Ran 3 scripts (context_gatherer.py, drift_detector.py, doc_structure_analyzer.py) with explicit output included in report"
|
|
126
|
+
}
|
|
127
|
+
],
|
|
128
|
+
"notes": []
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
"eval_id": 3,
|
|
132
|
+
"configuration": "without_skill",
|
|
133
|
+
"run_number": 1,
|
|
134
|
+
"result": {
|
|
135
|
+
"pass_rate": 0.75,
|
|
136
|
+
"passed": 3,
|
|
137
|
+
"failed": 1,
|
|
138
|
+
"total": 4,
|
|
139
|
+
"time_seconds": 72.5,
|
|
140
|
+
"tokens": 21934,
|
|
141
|
+
"tool_calls": 0,
|
|
142
|
+
"errors": 0
|
|
143
|
+
},
|
|
144
|
+
"expectations": [
|
|
145
|
+
{
|
|
146
|
+
"text": "Ran doc_structure_analyzer.py and referenced its structured output",
|
|
147
|
+
"passed": false,
|
|
148
|
+
"evidence": "Did not run doc_structure_analyzer.py. All findings came from manual README.md reads with line numbers."
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
"text": "Named specific README sections with their suggested docs/ destination",
|
|
152
|
+
"passed": true,
|
|
153
|
+
"evidence": "Named 6 specific sections with line numbers: Hooks Reference (114-141)\u2192docs/hooks.md, Policy System (66-87)\u2192new docs/policies.md, MCP Servers (143-158)\u2192docs/mcp.md, CLI Commands (89-111)\u2192XTRM-GUIDE.md, Version History (179-188)\u2192remove, Plugin Structure (52-63)\u2192borderline."
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
"text": "Report is actionable \u2014 tells user exactly what to do next, not just observations",
|
|
157
|
+
"passed": true,
|
|
158
|
+
"evidence": "Each section has a specific Recommendation: block with exact action (Remove section, Add single link, Create docs/policies.md, etc.). Estimated README would shrink from 193 to 60-70 lines."
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
"text": "Did not edit or create any files (audit only)",
|
|
162
|
+
"passed": true,
|
|
163
|
+
"evidence": "Report explicitly states no files were modified. Audit-only as instructed."
|
|
164
|
+
}
|
|
165
|
+
],
|
|
166
|
+
"notes": []
|
|
167
|
+
},
|
|
168
|
+
{
|
|
169
|
+
"eval_id": 2,
|
|
170
|
+
"configuration": "without_skill",
|
|
171
|
+
"run_number": 1,
|
|
172
|
+
"result": {
|
|
173
|
+
"pass_rate": 1.0,
|
|
174
|
+
"passed": 4,
|
|
175
|
+
"failed": 0,
|
|
176
|
+
"total": 4,
|
|
177
|
+
"time_seconds": 0.0,
|
|
178
|
+
"tokens": 0,
|
|
179
|
+
"tool_calls": 0,
|
|
180
|
+
"errors": 0
|
|
181
|
+
},
|
|
182
|
+
"expectations": [
|
|
183
|
+
{
|
|
184
|
+
"text": "Ran doc_structure_analyzer.py with --fix flag",
|
|
185
|
+
"passed": true,
|
|
186
|
+
"evidence": "Agent found the skill in the repo and ran doc_structure_analyzer.py --fix. However, found no MISSING gaps because with_skill run had already created those files (confounded test)."
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
"text": "Ran with --bd-remember or manually ran bd remember with a summary",
|
|
190
|
+
"passed": true,
|
|
191
|
+
"evidence": "Agent ran bd remember with key 'sync-docs-fix-schema-2026-03-18' summarizing the frontmatter additions made to 7 files."
|
|
192
|
+
},
|
|
193
|
+
{
|
|
194
|
+
"text": "At least one scaffold file was created in docs/",
|
|
195
|
+
"passed": true,
|
|
196
|
+
"evidence": "Added YAML frontmatter to 7 existing docs/ files (hooks.md, mcp.md, pre-install-cleanup.md, project-skills.md, skills.md, testing.md, todo.md). Different action than creating scaffolds but valid given scaffolds already existed."
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
"text": "Ran validate_doc.py on created files to confirm schema",
|
|
200
|
+
"passed": true,
|
|
201
|
+
"evidence": "Ran validate_doc.py docs/ \u2014 7/7 files passed after frontmatter additions."
|
|
202
|
+
}
|
|
203
|
+
],
|
|
204
|
+
"notes": []
|
|
205
|
+
},
|
|
206
|
+
{
|
|
207
|
+
"eval_id": 1,
|
|
208
|
+
"configuration": "without_skill",
|
|
209
|
+
"run_number": 1,
|
|
210
|
+
"result": {
|
|
211
|
+
"pass_rate": 0.25,
|
|
212
|
+
"passed": 1,
|
|
213
|
+
"failed": 3,
|
|
214
|
+
"total": 4,
|
|
215
|
+
"time_seconds": 0.0,
|
|
216
|
+
"tokens": 0,
|
|
217
|
+
"tool_calls": 0,
|
|
218
|
+
"errors": 0
|
|
219
|
+
},
|
|
220
|
+
"expectations": [
|
|
221
|
+
{
|
|
222
|
+
"text": "Ran context_gatherer.py and reported bd closed issues or merged PRs from the output",
|
|
223
|
+
"passed": false,
|
|
224
|
+
"evidence": "Did not run context_gatherer.py. Used git log manually. Reported 'No .beads/ DB was found' which is wrong \u2014 .beads/ exists. Missed all 20 closed bd issues."
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
"text": "Ran doc_structure_analyzer.py and used its output to identify doc issues",
|
|
228
|
+
"passed": false,
|
|
229
|
+
"evidence": "Did not run doc_structure_analyzer.py. Manually read README.md, package.json, and CHANGELOG.md."
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
"text": "Produced at least one concrete recommendation or action (not just a vague summary)",
|
|
233
|
+
"passed": true,
|
|
234
|
+
"evidence": "Found version mismatch (2.3.0 vs 2.4.1 in package.json), identified 7 undocumented branch commits in CHANGELOG, named specific line references."
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
"text": "Used the skill scripts rather than just reading files manually",
|
|
238
|
+
"passed": false,
|
|
239
|
+
"evidence": "No skill scripts were used. All findings came from manual git log, file reads, and README inspection."
|
|
240
|
+
}
|
|
241
|
+
],
|
|
242
|
+
"notes": []
|
|
243
|
+
}
|
|
244
|
+
],
|
|
245
|
+
"run_summary": {
|
|
246
|
+
"with_skill": {
|
|
247
|
+
"pass_rate": {
|
|
248
|
+
"mean": 0.8333,
|
|
249
|
+
"stddev": 0.1443,
|
|
250
|
+
"min": 0.75,
|
|
251
|
+
"max": 1.0
|
|
252
|
+
},
|
|
253
|
+
"time_seconds": {
|
|
254
|
+
"mean": 0.0,
|
|
255
|
+
"stddev": 0.0,
|
|
256
|
+
"min": 0.0,
|
|
257
|
+
"max": 0.0
|
|
258
|
+
},
|
|
259
|
+
"tokens": {
|
|
260
|
+
"mean": 0.0,
|
|
261
|
+
"stddev": 0.0,
|
|
262
|
+
"min": 0,
|
|
263
|
+
"max": 0
|
|
264
|
+
}
|
|
265
|
+
},
|
|
266
|
+
"without_skill": {
|
|
267
|
+
"pass_rate": {
|
|
268
|
+
"mean": 0.6667,
|
|
269
|
+
"stddev": 0.3819,
|
|
270
|
+
"min": 0.25,
|
|
271
|
+
"max": 1.0
|
|
272
|
+
},
|
|
273
|
+
"time_seconds": {
|
|
274
|
+
"mean": 24.1667,
|
|
275
|
+
"stddev": 41.8579,
|
|
276
|
+
"min": 0.0,
|
|
277
|
+
"max": 72.5
|
|
278
|
+
},
|
|
279
|
+
"tokens": {
|
|
280
|
+
"mean": 7311.3333,
|
|
281
|
+
"stddev": 12663.6008,
|
|
282
|
+
"min": 0,
|
|
283
|
+
"max": 21934
|
|
284
|
+
}
|
|
285
|
+
},
|
|
286
|
+
"delta": {
|
|
287
|
+
"pass_rate": "+0.17",
|
|
288
|
+
"time_seconds": "-24.2",
|
|
289
|
+
"tokens": "-7311"
|
|
290
|
+
}
|
|
291
|
+
},
|
|
292
|
+
"notes": []
|
|
293
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Skill Benchmark: sync-docs
|
|
2
|
+
|
|
3
|
+
**Model**: <model-name>
|
|
4
|
+
**Date**: 2026-03-18T07:43:29Z
|
|
5
|
+
**Evals**: 1, 2, 3 (3 runs each per configuration)
|
|
6
|
+
|
|
7
|
+
## Summary
|
|
8
|
+
|
|
9
|
+
| Metric | With Skill | Without Skill | Delta |
|
|
10
|
+
|--------|------------|---------------|-------|
|
|
11
|
+
| Pass Rate | 83% ± 14% | 67% ± 38% | +0.17 |
|
|
12
|
+
| Time | 0.0s ± 0.0s | 24.2s ± 41.9s | -24.2s |
|
|
13
|
+
| Tokens | 0 ± 0 | 7311 ± 12664 | -7311 |
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"eval_id": 3,
|
|
3
|
+
"eval_name": "doc-audit",
|
|
4
|
+
"prompt": "Do a doc audit. I think the README has sections that should be in docs/ but I'm not sure which ones.",
|
|
5
|
+
"assertions": [
|
|
6
|
+
{
|
|
7
|
+
"id": "ran-analyzer",
|
|
8
|
+
"description": "Ran doc_structure_analyzer.py and referenced its structured output",
|
|
9
|
+
"check": "result.md cites the analyzer output (EXTRACTABLE, BLOATED, line count, or specific section names from the report)"
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
"id": "named-specific-sections",
|
|
13
|
+
"description": "Named specific README sections with their suggested docs/ destination",
|
|
14
|
+
"check": "result.md lists at least 2 specific sections (e.g. '## Policy System → docs/policies.md') not just generic advice"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "actionable-report",
|
|
18
|
+
"description": "Report is actionable — tells user exactly what to do next, not just observations",
|
|
19
|
+
"check": "result.md includes a prioritized list or clear next steps, not just 'the README could be shorter'"
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"id": "no-edits-made",
|
|
23
|
+
"description": "Did not edit or create any files (audit only)",
|
|
24
|
+
"check": "result.md does not claim to have modified README.md or created docs/ files"
|
|
25
|
+
}
|
|
26
|
+
]
|
|
27
|
+
}
|