selftune 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +156 -0
- package/.claude/agents/evolution-reviewer.md +180 -0
- package/.claude/agents/integration-guide.md +212 -0
- package/.claude/agents/pattern-analyst.md +160 -0
- package/CHANGELOG.md +46 -1
- package/README.md +105 -257
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/assets/BeforeAfter.gif +0 -0
- package/assets/FeedbackLoop.gif +0 -0
- package/assets/logo.svg +9 -0
- package/assets/skill-health-badge.svg +20 -0
- package/cli/selftune/activation-rules.ts +171 -0
- package/cli/selftune/badge/badge-data.ts +108 -0
- package/cli/selftune/badge/badge-svg.ts +212 -0
- package/cli/selftune/badge/badge.ts +99 -0
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +103 -1
- package/cli/selftune/contribute/bundle.ts +314 -0
- package/cli/selftune/contribute/contribute.ts +214 -0
- package/cli/selftune/contribute/sanitize.ts +162 -0
- package/cli/selftune/cron/setup.ts +266 -0
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +1049 -0
- package/cli/selftune/dashboard.ts +43 -156
- package/cli/selftune/eval/baseline.ts +248 -0
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/composability.ts +117 -0
- package/cli/selftune/eval/generate-unit-tests.ts +143 -0
- package/cli/selftune/eval/hooks-to-evals.ts +101 -16
- package/cli/selftune/eval/import-skillsbench.ts +221 -0
- package/cli/selftune/eval/synthetic-evals.ts +172 -0
- package/cli/selftune/eval/unit-test-cli.ts +152 -0
- package/cli/selftune/eval/unit-test.ts +196 -0
- package/cli/selftune/evolution/deploy-proposal.ts +142 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +586 -0
- package/cli/selftune/evolution/evolve.ts +825 -116
- package/cli/selftune/evolution/extract-patterns.ts +105 -16
- package/cli/selftune/evolution/pareto.ts +314 -0
- package/cli/selftune/evolution/propose-body.ts +171 -0
- package/cli/selftune/evolution/propose-description.ts +100 -2
- package/cli/selftune/evolution/propose-routing.ts +166 -0
- package/cli/selftune/evolution/refine-body.ts +141 -0
- package/cli/selftune/evolution/rollback.ts +21 -4
- package/cli/selftune/evolution/validate-body.ts +254 -0
- package/cli/selftune/evolution/validate-proposal.ts +257 -35
- package/cli/selftune/evolution/validate-routing.ts +177 -0
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +513 -42
- package/cli/selftune/grading/pre-gates.ts +104 -0
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/auto-activate.ts +185 -0
- package/cli/selftune/hooks/evolution-guard.ts +165 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-change-guard.ts +112 -0
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +415 -48
- package/cli/selftune/ingestors/claude-replay.ts +377 -0
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +573 -0
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +376 -16
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/memory/writer.ts +447 -0
- package/cli/selftune/monitoring/watch.ts +90 -16
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +59 -33
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +525 -5
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/frontmatter.ts +217 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +103 -19
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +307 -26
- package/cli/selftune/utils/trigger-check.ts +89 -0
- package/cli/selftune/utils/tui.ts +156 -0
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +28 -11
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +180 -33
- package/skill/Workflows/AutoActivation.md +145 -0
- package/skill/Workflows/Badge.md +124 -0
- package/skill/Workflows/Baseline.md +144 -0
- package/skill/Workflows/Composability.md +107 -0
- package/skill/Workflows/Contribute.md +94 -0
- package/skill/Workflows/Cron.md +132 -0
- package/skill/Workflows/Dashboard.md +214 -0
- package/skill/Workflows/Doctor.md +63 -14
- package/skill/Workflows/Evals.md +110 -18
- package/skill/Workflows/EvolutionMemory.md +154 -0
- package/skill/Workflows/Evolve.md +181 -21
- package/skill/Workflows/EvolveBody.md +159 -0
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +117 -0
- package/skill/Workflows/Ingest.md +142 -21
- package/skill/Workflows/Initialize.md +91 -23
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +91 -0
- package/skill/Workflows/Rollback.md +23 -4
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +150 -0
- package/skill/Workflows/Watch.md +33 -1
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +23 -0
- package/templates/activation-rules-default.json +27 -0
- package/templates/multi-skill-settings.json +64 -0
- package/templates/single-skill-settings.json +58 -0
- package/dashboard/index.html +0 -1119
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pre-gates.ts
|
|
3
|
+
*
|
|
4
|
+
* Deterministic pre-gate checks that resolve grading expectations without LLM.
|
|
5
|
+
* Each gate matches an expectation text pattern and resolves it using telemetry data.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { GradingExpectation, SessionTelemetryRecord } from "../types.js";
|
|
9
|
+
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// Gate definitions
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
|
|
14
|
+
export interface PreGate {
|
|
15
|
+
name: string;
|
|
16
|
+
pattern: RegExp;
|
|
17
|
+
check: (ctx: PreGateContext) => boolean;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface PreGateContext {
|
|
21
|
+
telemetry: SessionTelemetryRecord;
|
|
22
|
+
skillName: string;
|
|
23
|
+
transcriptExcerpt?: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface PreGateResult {
|
|
27
|
+
resolved: GradingExpectation[];
|
|
28
|
+
remaining: string[];
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/** Default set of pre-gates. */
|
|
32
|
+
export const DEFAULT_GATES: PreGate[] = [
|
|
33
|
+
{
|
|
34
|
+
name: "skill_md_read",
|
|
35
|
+
pattern: /(read.*skill\.md|skill\.md.*read)/i,
|
|
36
|
+
check: (ctx) => {
|
|
37
|
+
// Check if skills_triggered contains the skill name
|
|
38
|
+
const triggered = ctx.telemetry.skills_triggered ?? [];
|
|
39
|
+
if (triggered.includes(ctx.skillName)) return true;
|
|
40
|
+
// Also check if transcript mentions reading SKILL.md
|
|
41
|
+
if (ctx.transcriptExcerpt && /Read.*SKILL\.md/i.test(ctx.transcriptExcerpt)) return true;
|
|
42
|
+
return false;
|
|
43
|
+
},
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
name: "expected_tools_called",
|
|
47
|
+
pattern: /tool[s]?\s+(were\s+)?called/i,
|
|
48
|
+
check: (ctx) => (ctx.telemetry.total_tool_calls ?? 0) > 0,
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
name: "error_count",
|
|
52
|
+
pattern: /error[s]?\s*(count|encountered)/i,
|
|
53
|
+
check: (ctx) => (ctx.telemetry.errors_encountered ?? 0) <= 2,
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: "session_completed",
|
|
57
|
+
pattern: /session\s*(completed|finished)/i,
|
|
58
|
+
check: (ctx) => (ctx.telemetry.assistant_turns ?? 0) > 0,
|
|
59
|
+
},
|
|
60
|
+
];
|
|
61
|
+
|
|
62
|
+
// ---------------------------------------------------------------------------
|
|
63
|
+
// Pre-gate runner
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Run pre-gate checks against expectations. Returns resolved expectations
|
|
68
|
+
* (with source: "pre-gate" and score: 1.0 or 0.0) and remaining expectation
|
|
69
|
+
* texts that need LLM grading.
|
|
70
|
+
*/
|
|
71
|
+
export function runPreGates(
|
|
72
|
+
expectations: string[],
|
|
73
|
+
ctx: PreGateContext,
|
|
74
|
+
gates: PreGate[] = DEFAULT_GATES,
|
|
75
|
+
): PreGateResult {
|
|
76
|
+
const resolved: GradingExpectation[] = [];
|
|
77
|
+
const remaining: string[] = [];
|
|
78
|
+
|
|
79
|
+
for (const text of expectations) {
|
|
80
|
+
let matched = false;
|
|
81
|
+
for (const gate of gates) {
|
|
82
|
+
if (gate.pattern.global || gate.pattern.sticky) {
|
|
83
|
+
gate.pattern.lastIndex = 0;
|
|
84
|
+
}
|
|
85
|
+
if (gate.pattern.test(text)) {
|
|
86
|
+
const passed = gate.check(ctx);
|
|
87
|
+
resolved.push({
|
|
88
|
+
text,
|
|
89
|
+
passed,
|
|
90
|
+
evidence: `Pre-gate "${gate.name}": ${passed ? "PASS" : "FAIL"}`,
|
|
91
|
+
score: passed ? 1.0 : 0.0,
|
|
92
|
+
source: "pre-gate",
|
|
93
|
+
});
|
|
94
|
+
matched = true;
|
|
95
|
+
break; // first matching gate wins
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (!matched) {
|
|
99
|
+
remaining.push(text);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return { resolved, remaining };
|
|
104
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
|
|
4
|
+
import { SELFTUNE_CONFIG_DIR } from "../constants.js";
|
|
5
|
+
import type { GradingResult } from "../types.js";
|
|
6
|
+
|
|
7
|
+
export const DEFAULT_GRADING_DIR = join(SELFTUNE_CONFIG_DIR, "grading");
|
|
8
|
+
|
|
9
|
+
export function readGradingResults(gradingDir: string = DEFAULT_GRADING_DIR): GradingResult[] {
|
|
10
|
+
if (!existsSync(gradingDir)) return [];
|
|
11
|
+
|
|
12
|
+
const results: GradingResult[] = [];
|
|
13
|
+
|
|
14
|
+
for (const entry of readdirSync(gradingDir).sort()) {
|
|
15
|
+
if (!entry.startsWith("result-") || !entry.endsWith(".json")) continue;
|
|
16
|
+
|
|
17
|
+
try {
|
|
18
|
+
const parsed = JSON.parse(
|
|
19
|
+
readFileSync(join(gradingDir, entry), "utf-8"),
|
|
20
|
+
) as Partial<GradingResult>;
|
|
21
|
+
if (typeof parsed?.session_id !== "string" || typeof parsed?.skill_name !== "string")
|
|
22
|
+
continue;
|
|
23
|
+
results.push(parsed as GradingResult);
|
|
24
|
+
} catch {
|
|
25
|
+
// Ignore malformed grading artifacts.
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return results.sort((a, b) => b.graded_at.localeCompare(a.graded_at));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function readGradingResultsForSkill(
|
|
33
|
+
skillName: string,
|
|
34
|
+
gradingDir: string = DEFAULT_GRADING_DIR,
|
|
35
|
+
): GradingResult[] {
|
|
36
|
+
const normalizedSkill = skillName.trim().toLowerCase();
|
|
37
|
+
if (!normalizedSkill) return [];
|
|
38
|
+
|
|
39
|
+
return readGradingResults(gradingDir).filter(
|
|
40
|
+
(result) => result.skill_name.trim().toLowerCase() === normalizedSkill,
|
|
41
|
+
);
|
|
42
|
+
}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* Claude Code UserPromptSubmit hook: auto-activate.ts
|
|
4
|
+
*
|
|
5
|
+
* Evaluates activation rules against the current session context and
|
|
6
|
+
* outputs suggestions to stderr (shown to Claude as system messages).
|
|
7
|
+
* Suggestions are advisory — exit code is always 0.
|
|
8
|
+
*
|
|
9
|
+
* Session state is tracked to avoid repeated nags within a session.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
13
|
+
import { dirname } from "node:path";
|
|
14
|
+
import {
|
|
15
|
+
CLAUDE_SETTINGS_PATH,
|
|
16
|
+
EVOLUTION_AUDIT_LOG,
|
|
17
|
+
QUERY_LOG,
|
|
18
|
+
SELFTUNE_CONFIG_DIR,
|
|
19
|
+
sessionStatePath,
|
|
20
|
+
TELEMETRY_LOG,
|
|
21
|
+
} from "../constants.js";
|
|
22
|
+
import type {
|
|
23
|
+
ActivationContext,
|
|
24
|
+
ActivationRule,
|
|
25
|
+
PromptSubmitPayload,
|
|
26
|
+
SessionState,
|
|
27
|
+
} from "../types.js";
|
|
28
|
+
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Session state persistence
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
export function loadSessionState(path: string, sessionId: string): SessionState {
|
|
34
|
+
if (!existsSync(path)) {
|
|
35
|
+
return { session_id: sessionId, suggestions_shown: [], updated_at: new Date().toISOString() };
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
const data = JSON.parse(readFileSync(path, "utf-8")) as SessionState;
|
|
40
|
+
if (data.session_id === sessionId && Array.isArray(data.suggestions_shown)) {
|
|
41
|
+
return data;
|
|
42
|
+
}
|
|
43
|
+
} catch {
|
|
44
|
+
// corrupt file — start fresh
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return { session_id: sessionId, suggestions_shown: [], updated_at: new Date().toISOString() };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function saveSessionState(path: string, state: SessionState): void {
|
|
51
|
+
const dir = dirname(path);
|
|
52
|
+
if (!existsSync(dir)) {
|
|
53
|
+
mkdirSync(dir, { recursive: true });
|
|
54
|
+
}
|
|
55
|
+
writeFileSync(path, JSON.stringify(state, null, 2), "utf-8");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
// PAI coexistence check
|
|
60
|
+
// ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Check if PAI's skill-activation-prompt hook is registered in settings.
|
|
64
|
+
* If so, selftune defers skill-level suggestions.
|
|
65
|
+
*/
|
|
66
|
+
export function checkPaiCoexistence(settingsPath: string): boolean {
|
|
67
|
+
if (!existsSync(settingsPath)) return false;
|
|
68
|
+
|
|
69
|
+
try {
|
|
70
|
+
const settings = JSON.parse(readFileSync(settingsPath, "utf-8")) as {
|
|
71
|
+
hooks?: Record<string, Array<{ command?: string; hooks?: Array<{ command?: string }> }>>;
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
if (!settings.hooks) return false;
|
|
75
|
+
|
|
76
|
+
// Search all hook entries for skill-activation-prompt
|
|
77
|
+
for (const hookEntries of Object.values(settings.hooks)) {
|
|
78
|
+
if (!Array.isArray(hookEntries)) continue;
|
|
79
|
+
for (const entry of hookEntries) {
|
|
80
|
+
// Check flat entry.command
|
|
81
|
+
if (
|
|
82
|
+
typeof entry.command === "string" &&
|
|
83
|
+
entry.command.includes("skill-activation-prompt")
|
|
84
|
+
) {
|
|
85
|
+
return true;
|
|
86
|
+
}
|
|
87
|
+
// Check nested entry.hooks[].command
|
|
88
|
+
if (entry.hooks && Array.isArray(entry.hooks)) {
|
|
89
|
+
for (const hook of entry.hooks) {
|
|
90
|
+
if (
|
|
91
|
+
typeof hook.command === "string" &&
|
|
92
|
+
hook.command.includes("skill-activation-prompt")
|
|
93
|
+
) {
|
|
94
|
+
return true;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
} catch {
|
|
101
|
+
// fail-open
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
// Rule evaluation engine
|
|
109
|
+
// ---------------------------------------------------------------------------
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Evaluate all rules against the current context, respecting session state.
|
|
113
|
+
* Returns array of suggestion strings for rules that fired.
|
|
114
|
+
*/
|
|
115
|
+
export function evaluateRules(
|
|
116
|
+
rules: ActivationRule[],
|
|
117
|
+
ctx: ActivationContext,
|
|
118
|
+
statePath: string,
|
|
119
|
+
): string[] {
|
|
120
|
+
const state = loadSessionState(statePath, ctx.session_id);
|
|
121
|
+
const suggestions: string[] = [];
|
|
122
|
+
const newlyShown: string[] = [];
|
|
123
|
+
|
|
124
|
+
for (const rule of rules) {
|
|
125
|
+
// Skip rules already shown this session
|
|
126
|
+
if (state.suggestions_shown.includes(rule.id)) continue;
|
|
127
|
+
|
|
128
|
+
try {
|
|
129
|
+
const suggestion = rule.evaluate(ctx);
|
|
130
|
+
if (suggestion !== null) {
|
|
131
|
+
suggestions.push(suggestion);
|
|
132
|
+
newlyShown.push(rule.id);
|
|
133
|
+
}
|
|
134
|
+
} catch {
|
|
135
|
+
// fail-open: skip rules that throw
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Persist updated session state
|
|
140
|
+
if (newlyShown.length > 0) {
|
|
141
|
+
state.suggestions_shown.push(...newlyShown);
|
|
142
|
+
state.updated_at = new Date().toISOString();
|
|
143
|
+
saveSessionState(statePath, state);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return suggestions;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// ---------------------------------------------------------------------------
|
|
150
|
+
// stdin main (only when executed directly, not when imported)
|
|
151
|
+
// ---------------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
if (import.meta.main) {
|
|
154
|
+
try {
|
|
155
|
+
const payload: PromptSubmitPayload = JSON.parse(await Bun.stdin.text());
|
|
156
|
+
const sessionId = payload.session_id ?? "unknown";
|
|
157
|
+
|
|
158
|
+
// Dynamically import default rules (keeps hook file lightweight)
|
|
159
|
+
const { DEFAULT_RULES } = await import("../activation-rules.js");
|
|
160
|
+
|
|
161
|
+
const ctx: ActivationContext = {
|
|
162
|
+
session_id: sessionId,
|
|
163
|
+
query_log_path: QUERY_LOG,
|
|
164
|
+
telemetry_log_path: TELEMETRY_LOG,
|
|
165
|
+
evolution_audit_log_path: EVOLUTION_AUDIT_LOG,
|
|
166
|
+
selftune_dir: SELFTUNE_CONFIG_DIR,
|
|
167
|
+
settings_path: CLAUDE_SETTINGS_PATH,
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
// Check PAI coexistence — if PAI is active, skip selftune suggestions
|
|
171
|
+
// (PAI handles skill-level activation; selftune handles observability)
|
|
172
|
+
if (!checkPaiCoexistence(CLAUDE_SETTINGS_PATH)) {
|
|
173
|
+
const statePath = sessionStatePath(sessionId);
|
|
174
|
+
const suggestions = evaluateRules(DEFAULT_RULES, ctx, statePath);
|
|
175
|
+
|
|
176
|
+
for (const s of suggestions) {
|
|
177
|
+
// Output to stderr — Claude Code shows stderr as system messages
|
|
178
|
+
process.stderr.write(`[selftune] 💡 Suggestion: ${s}\n`);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
} catch {
|
|
182
|
+
// silent — hooks must never block Claude
|
|
183
|
+
}
|
|
184
|
+
process.exit(0);
|
|
185
|
+
}
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* Claude Code PreToolUse hook: evolution-guard.ts
|
|
4
|
+
*
|
|
5
|
+
* Fires before Write/Edit tool calls. If the target is a SKILL.md file
|
|
6
|
+
* that has a deployed evolution (i.e., is under active monitoring), and
|
|
7
|
+
* no recent `selftune watch` snapshot exists, this hook BLOCKS the write
|
|
8
|
+
* with exit code 2 and a message suggesting to run watch first.
|
|
9
|
+
*
|
|
10
|
+
* Exit codes:
|
|
11
|
+
* 0 = allow (not a SKILL.md, not monitored, or watch is recent)
|
|
12
|
+
* 2 = block with message (Claude Code convention for PreToolUse hooks)
|
|
13
|
+
*
|
|
14
|
+
* Fail-open: any error → exit 0 (never block accidentally).
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
18
|
+
import { basename, dirname, join } from "node:path";
|
|
19
|
+
import { EVOLUTION_AUDIT_LOG, SELFTUNE_CONFIG_DIR } from "../constants.js";
|
|
20
|
+
import type { PreToolUsePayload } from "../types.js";
|
|
21
|
+
import { readJsonl } from "../utils/jsonl.js";
|
|
22
|
+
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Detection helpers (same pattern as skill-change-guard)
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
function isSkillMdWrite(toolName: string, filePath: string): boolean {
|
|
28
|
+
if (toolName !== "Write" && toolName !== "Edit") return false;
|
|
29
|
+
return basename(filePath).toUpperCase() === "SKILL.MD";
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function extractSkillName(filePath: string): string {
|
|
33
|
+
return basename(dirname(filePath)) || "unknown";
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
// Active monitoring check (reads audit log directly — no evolution imports)
|
|
38
|
+
// ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Check if a skill has an active deployed evolution (meaning it's under monitoring).
|
|
42
|
+
* Reads the evolution audit JSONL directly to respect architecture lint rules.
|
|
43
|
+
*
|
|
44
|
+
* A skill is "actively monitored" if its last audit action is "deployed".
|
|
45
|
+
* If the last action is "rolled_back", it's no longer monitored.
|
|
46
|
+
*/
|
|
47
|
+
export function checkActiveMonitoring(skillName: string, auditLogPath: string): boolean {
|
|
48
|
+
const entries = readJsonl<{
|
|
49
|
+
skill_name?: string;
|
|
50
|
+
action: string;
|
|
51
|
+
}>(auditLogPath);
|
|
52
|
+
|
|
53
|
+
// Filter entries for this skill by skill_name field
|
|
54
|
+
const skillEntries = entries.filter((e) => e.skill_name === skillName);
|
|
55
|
+
if (skillEntries.length === 0) return false;
|
|
56
|
+
|
|
57
|
+
const lastEntry = skillEntries[skillEntries.length - 1];
|
|
58
|
+
return lastEntry.action === "deployed";
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
// Recent watch snapshot check (reads monitoring dir directly)
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Check if there's a recent monitoring snapshot for the given skill.
|
|
67
|
+
* "Recent" means within `maxAgeHours` hours.
|
|
68
|
+
*/
|
|
69
|
+
export function hasRecentWatchSnapshot(
|
|
70
|
+
skillName: string,
|
|
71
|
+
selftuneDir: string,
|
|
72
|
+
maxAgeHours: number,
|
|
73
|
+
): boolean {
|
|
74
|
+
const snapshotPath = join(selftuneDir, "monitoring", "latest-snapshot.json");
|
|
75
|
+
if (!existsSync(snapshotPath)) return false;
|
|
76
|
+
|
|
77
|
+
try {
|
|
78
|
+
const snapshot = JSON.parse(readFileSync(snapshotPath, "utf-8")) as {
|
|
79
|
+
timestamp: string;
|
|
80
|
+
skill_name?: string;
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// Must be for the same skill
|
|
84
|
+
if (snapshot.skill_name !== skillName) return false;
|
|
85
|
+
|
|
86
|
+
// Must be recent
|
|
87
|
+
const snapshotAge = Date.now() - new Date(snapshot.timestamp).getTime();
|
|
88
|
+
const maxAgeMs = maxAgeHours * 60 * 60 * 1000;
|
|
89
|
+
return snapshotAge <= maxAgeMs;
|
|
90
|
+
} catch {
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ---------------------------------------------------------------------------
|
|
96
|
+
// Guard result type
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
export interface GuardResult {
|
|
100
|
+
exitCode: number;
|
|
101
|
+
message: string;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// ---------------------------------------------------------------------------
|
|
105
|
+
// Core processing logic
|
|
106
|
+
// ---------------------------------------------------------------------------
|
|
107
|
+
|
|
108
|
+
export interface GuardOptions {
|
|
109
|
+
auditLogPath: string;
|
|
110
|
+
selftuneDir: string;
|
|
111
|
+
maxSnapshotAgeHours?: number;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Process a PreToolUse payload. Returns null if the write should be allowed,
|
|
116
|
+
* or a GuardResult with exitCode 2 if the write should be blocked.
|
|
117
|
+
*/
|
|
118
|
+
export function processEvolutionGuard(
|
|
119
|
+
payload: PreToolUsePayload,
|
|
120
|
+
options: GuardOptions,
|
|
121
|
+
): GuardResult | null {
|
|
122
|
+
const filePath =
|
|
123
|
+
typeof payload.tool_input?.file_path === "string" ? payload.tool_input.file_path : "";
|
|
124
|
+
|
|
125
|
+
if (!isSkillMdWrite(payload.tool_name, filePath)) return null;
|
|
126
|
+
|
|
127
|
+
const skillName = extractSkillName(filePath);
|
|
128
|
+
const { auditLogPath, selftuneDir, maxSnapshotAgeHours = 24 } = options;
|
|
129
|
+
|
|
130
|
+
// Check if this skill is under active monitoring
|
|
131
|
+
if (!checkActiveMonitoring(skillName, auditLogPath)) return null;
|
|
132
|
+
|
|
133
|
+
// Check if there's a recent watch snapshot
|
|
134
|
+
if (hasRecentWatchSnapshot(skillName, selftuneDir, maxSnapshotAgeHours)) return null;
|
|
135
|
+
|
|
136
|
+
// Block: skill is monitored but no recent watch
|
|
137
|
+
return {
|
|
138
|
+
exitCode: 2,
|
|
139
|
+
message: `[selftune] Skill "${skillName}" has a deployed evolution and is under active monitoring. Run \`selftune watch --skill ${skillName}\` before modifying SKILL.md to check current health.`,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
// stdin main (only when executed directly, not when imported)
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
if (import.meta.main) {
|
|
148
|
+
try {
|
|
149
|
+
const payload: PreToolUsePayload = JSON.parse(await Bun.stdin.text());
|
|
150
|
+
|
|
151
|
+
const result = processEvolutionGuard(payload, {
|
|
152
|
+
auditLogPath: EVOLUTION_AUDIT_LOG,
|
|
153
|
+
selftuneDir: SELFTUNE_CONFIG_DIR,
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
if (result) {
|
|
157
|
+
// Exit code 2 = block with message
|
|
158
|
+
process.stderr.write(`${result.message}\n`);
|
|
159
|
+
process.exit(2);
|
|
160
|
+
}
|
|
161
|
+
} catch {
|
|
162
|
+
// Fail-open: any error → allow the write
|
|
163
|
+
}
|
|
164
|
+
process.exit(0);
|
|
165
|
+
}
|
|
@@ -8,10 +8,141 @@
|
|
|
8
8
|
* a skill — the raw material for false-negative eval entries.
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
import {
|
|
12
|
-
import
|
|
11
|
+
import { readdirSync } from "node:fs";
|
|
12
|
+
import { homedir } from "node:os";
|
|
13
|
+
import { join } from "node:path";
|
|
14
|
+
import { CANONICAL_LOG, QUERY_LOG, SIGNAL_LOG, SKIP_PREFIXES } from "../constants.js";
|
|
15
|
+
import {
|
|
16
|
+
appendCanonicalRecord,
|
|
17
|
+
buildCanonicalPrompt,
|
|
18
|
+
type CanonicalBaseInput,
|
|
19
|
+
classifyIsActionable,
|
|
20
|
+
reservePromptIdentity,
|
|
21
|
+
} from "../normalization.js";
|
|
22
|
+
import type { ImprovementSignalRecord, PromptSubmitPayload, QueryLogRecord } from "../types.js";
|
|
13
23
|
import { appendJsonl } from "../utils/jsonl.js";
|
|
14
24
|
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
// Installed skill name cache
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
let cachedSkillNames: string[] | null = null;
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Read directory names from ~/.claude/skills. Cached after first call.
|
|
33
|
+
* Returns empty array on any error (graceful degradation).
|
|
34
|
+
*/
|
|
35
|
+
export function getInstalledSkillNames(): string[] {
|
|
36
|
+
if (cachedSkillNames !== null) return cachedSkillNames;
|
|
37
|
+
try {
|
|
38
|
+
const skillsDir = join(homedir(), ".claude", "skills");
|
|
39
|
+
const entries = readdirSync(skillsDir, { withFileTypes: true });
|
|
40
|
+
cachedSkillNames = entries.filter((e) => e.isDirectory()).map((e) => e.name);
|
|
41
|
+
} catch {
|
|
42
|
+
cachedSkillNames = [];
|
|
43
|
+
}
|
|
44
|
+
return cachedSkillNames;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
// Signal detection patterns
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
interface SignalPattern {
|
|
52
|
+
regex: RegExp;
|
|
53
|
+
signal_type: "correction" | "explicit_request";
|
|
54
|
+
/** Named capture group index for the skill name. */
|
|
55
|
+
skillGroup: string;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const SIGNAL_PATTERNS: SignalPattern[] = [
|
|
59
|
+
// "why didn't you use/run/invoke X" → correction
|
|
60
|
+
{
|
|
61
|
+
regex: /why\s+didn['']t\s+you\s+(?:use|run|invoke)\s+(?:the\s+)?(?<skill>[\w-]+)/i,
|
|
62
|
+
signal_type: "correction",
|
|
63
|
+
skillGroup: "skill",
|
|
64
|
+
},
|
|
65
|
+
// "you should have used X" → correction
|
|
66
|
+
{
|
|
67
|
+
regex: /you\s+should\s+have\s+used\s+(?:the\s+)?(?<skill>[\w-]+)/i,
|
|
68
|
+
signal_type: "correction",
|
|
69
|
+
skillGroup: "skill",
|
|
70
|
+
},
|
|
71
|
+
// "next time use X" → correction
|
|
72
|
+
{
|
|
73
|
+
regex: /next\s+time\s+use\s+(?:the\s+)?(?<skill>[\w-]+)/i,
|
|
74
|
+
signal_type: "correction",
|
|
75
|
+
skillGroup: "skill",
|
|
76
|
+
},
|
|
77
|
+
// "forgot to use X" → correction
|
|
78
|
+
{
|
|
79
|
+
regex: /forgot\s+to\s+use\s+(?:the\s+)?(?<skill>[\w-]+)/i,
|
|
80
|
+
signal_type: "correction",
|
|
81
|
+
skillGroup: "skill",
|
|
82
|
+
},
|
|
83
|
+
// "please use X skill" / "please use the X skill" → explicit_request
|
|
84
|
+
{
|
|
85
|
+
regex: /please\s+use\s+(?:the\s+)?(?<skill>[\w-]+)\s+skill/i,
|
|
86
|
+
signal_type: "explicit_request",
|
|
87
|
+
skillGroup: "skill",
|
|
88
|
+
},
|
|
89
|
+
// "use the X skill" → explicit_request (must have "the" and "skill" to avoid false positives)
|
|
90
|
+
{
|
|
91
|
+
regex: /\buse\s+the\s+(?<skill>[\w-]+)\s+skill/i,
|
|
92
|
+
signal_type: "explicit_request",
|
|
93
|
+
skillGroup: "skill",
|
|
94
|
+
},
|
|
95
|
+
];
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Detect whether a user query contains an improvement signal.
|
|
99
|
+
* Pure regex — no LLM, no network.
|
|
100
|
+
*/
|
|
101
|
+
export function detectImprovementSignal(
|
|
102
|
+
query: string,
|
|
103
|
+
sessionId: string,
|
|
104
|
+
installedSkills?: string[],
|
|
105
|
+
): ImprovementSignalRecord | null {
|
|
106
|
+
const skills = installedSkills ?? getInstalledSkillNames();
|
|
107
|
+
const skillsLower = skills.map((s) => s.toLowerCase());
|
|
108
|
+
|
|
109
|
+
for (const pattern of SIGNAL_PATTERNS) {
|
|
110
|
+
const match = query.match(pattern.regex);
|
|
111
|
+
if (!match?.groups?.[pattern.skillGroup]) continue;
|
|
112
|
+
|
|
113
|
+
const rawSkill = match.groups[pattern.skillGroup];
|
|
114
|
+
|
|
115
|
+
// Skip generic words that aren't skill names
|
|
116
|
+
const genericWords = new Set(["strict", "git", "the", "a", "an", "this", "that", "it", "my"]);
|
|
117
|
+
if (genericWords.has(rawSkill.toLowerCase())) continue;
|
|
118
|
+
|
|
119
|
+
// Try to match against installed skills (case-insensitive)
|
|
120
|
+
let mentionedSkill: string | undefined;
|
|
121
|
+
const idx = skillsLower.indexOf(rawSkill.toLowerCase());
|
|
122
|
+
if (idx !== -1) {
|
|
123
|
+
mentionedSkill = skills[idx];
|
|
124
|
+
} else {
|
|
125
|
+
// Use the raw captured name if it looks like a skill (capitalized or known)
|
|
126
|
+
mentionedSkill = rawSkill;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
timestamp: new Date().toISOString(),
|
|
131
|
+
session_id: sessionId,
|
|
132
|
+
query,
|
|
133
|
+
signal_type: pattern.signal_type,
|
|
134
|
+
mentioned_skill: mentionedSkill,
|
|
135
|
+
consumed: false,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ---------------------------------------------------------------------------
|
|
143
|
+
// Core prompt processing
|
|
144
|
+
// ---------------------------------------------------------------------------
|
|
145
|
+
|
|
15
146
|
/**
|
|
16
147
|
* Core processing logic, exported for testability.
|
|
17
148
|
* Returns the record that was appended, or null if skipped.
|
|
@@ -19,6 +150,9 @@ import { appendJsonl } from "../utils/jsonl.js";
|
|
|
19
150
|
export function processPrompt(
|
|
20
151
|
payload: PromptSubmitPayload,
|
|
21
152
|
logPath: string = QUERY_LOG,
|
|
153
|
+
canonicalLogPath: string = CANONICAL_LOG,
|
|
154
|
+
promptStatePath?: string,
|
|
155
|
+
signalLogPath: string = SIGNAL_LOG,
|
|
22
156
|
): QueryLogRecord | null {
|
|
23
157
|
const query = (payload.user_prompt ?? "").trim();
|
|
24
158
|
|
|
@@ -37,6 +171,42 @@ export function processPrompt(
|
|
|
37
171
|
};
|
|
38
172
|
|
|
39
173
|
appendJsonl(logPath, record);
|
|
174
|
+
|
|
175
|
+
// Emit canonical prompt record (additive)
|
|
176
|
+
const baseInput: CanonicalBaseInput = {
|
|
177
|
+
platform: "claude_code",
|
|
178
|
+
capture_mode: "hook",
|
|
179
|
+
source_session_kind: "interactive",
|
|
180
|
+
session_id: record.session_id,
|
|
181
|
+
raw_source_ref: { event_type: "UserPromptSubmit" },
|
|
182
|
+
};
|
|
183
|
+
const isActionable = classifyIsActionable(query);
|
|
184
|
+
const promptIdentity = reservePromptIdentity(
|
|
185
|
+
record.session_id,
|
|
186
|
+
isActionable,
|
|
187
|
+
promptStatePath,
|
|
188
|
+
canonicalLogPath,
|
|
189
|
+
);
|
|
190
|
+
const canonical = buildCanonicalPrompt({
|
|
191
|
+
...baseInput,
|
|
192
|
+
prompt_id: promptIdentity.prompt_id,
|
|
193
|
+
occurred_at: record.timestamp,
|
|
194
|
+
prompt_text: query,
|
|
195
|
+
prompt_index: promptIdentity.prompt_index,
|
|
196
|
+
is_actionable: isActionable,
|
|
197
|
+
});
|
|
198
|
+
appendCanonicalRecord(canonical, canonicalLogPath);
|
|
199
|
+
|
|
200
|
+
// Detect and log improvement signals (never throws)
|
|
201
|
+
try {
|
|
202
|
+
const signal = detectImprovementSignal(query, record.session_id);
|
|
203
|
+
if (signal) {
|
|
204
|
+
appendJsonl(signalLogPath, signal);
|
|
205
|
+
}
|
|
206
|
+
} catch {
|
|
207
|
+
// silent — hooks must never block Claude
|
|
208
|
+
}
|
|
209
|
+
|
|
40
210
|
return record;
|
|
41
211
|
}
|
|
42
212
|
|