selftune 0.2.13 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
- package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +16 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +12 -0
- package/apps/local-dashboard/dist/index.html +3 -3
- package/cli/selftune/activation-rules.ts +24 -48
- package/cli/selftune/analytics.ts +13 -11
- package/cli/selftune/badge/badge.ts +13 -9
- package/cli/selftune/canonical-export.ts +6 -6
- package/cli/selftune/constants.ts +7 -0
- package/cli/selftune/contribute/bundle.ts +9 -44
- package/cli/selftune/contribute/contribute.ts +2 -1
- package/cli/selftune/cron/setup.ts +3 -1
- package/cli/selftune/dashboard-contract.ts +22 -0
- package/cli/selftune/dashboard.ts +10 -5
- package/cli/selftune/eval/baseline.ts +20 -30
- package/cli/selftune/eval/hooks-to-evals.ts +27 -34
- package/cli/selftune/eval/import-skillsbench.ts +21 -8
- package/cli/selftune/eval/unit-test-cli.ts +22 -11
- package/cli/selftune/evolution/description-quality.ts +224 -0
- package/cli/selftune/evolution/evolve-body.ts +17 -10
- package/cli/selftune/evolution/evolve.ts +70 -57
- package/cli/selftune/evolution/rollback.ts +7 -6
- package/cli/selftune/grading/auto-grade.ts +27 -35
- package/cli/selftune/grading/grade-session.ts +24 -30
- package/cli/selftune/hooks/auto-activate.ts +12 -3
- package/cli/selftune/hooks/evolution-guard.ts +14 -24
- package/cli/selftune/hooks/prompt-log.ts +7 -9
- package/cli/selftune/hooks/session-stop.ts +0 -8
- package/cli/selftune/index.ts +66 -69
- package/cli/selftune/ingestors/claude-replay.ts +29 -14
- package/cli/selftune/ingestors/codex-rollout.ts +15 -5
- package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
- package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
- package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
- package/cli/selftune/init.ts +14 -9
- package/cli/selftune/localdb/queries.ts +57 -0
- package/cli/selftune/monitoring/watch.ts +39 -38
- package/cli/selftune/normalization.ts +2 -23
- package/cli/selftune/orchestrate.ts +224 -24
- package/cli/selftune/routes/skill-report.ts +17 -0
- package/cli/selftune/schedule.ts +74 -14
- package/cli/selftune/sync.ts +7 -3
- package/cli/selftune/types.ts +44 -10
- package/cli/selftune/utils/cli-error.ts +102 -0
- package/cli/selftune/utils/jsonl.ts +2 -0
- package/cli/selftune/workflows/workflows.ts +23 -17
- package/package.json +3 -1
- package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
- package/packages/ui/src/components/index.ts +1 -0
- package/packages/ui/src/components/section-cards.tsx +13 -0
- package/skill/SKILL.md +1 -1
- package/skill/Workflows/Evolve.md +4 -0
- package/skill/Workflows/Initialize.md +8 -8
- package/skill/Workflows/Orchestrate.md +11 -7
- package/skill/Workflows/Schedule.md +11 -0
- package/skill/references/logs.md +22 -21
- package/skill/settings_snippet.json +29 -6
- package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
- package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2
- package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +0 -12
|
@@ -36,10 +36,12 @@ import type {
|
|
|
36
36
|
SessionTelemetryRecord,
|
|
37
37
|
SkillUsageRecord,
|
|
38
38
|
} from "../types.js";
|
|
39
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
39
40
|
import { parseFrontmatter, replaceDescription } from "../utils/frontmatter.js";
|
|
40
41
|
import { createEvolveTUI } from "../utils/tui.js";
|
|
41
42
|
import { appendAuditEntry } from "./audit.js";
|
|
42
43
|
import { checkConstitution } from "./constitutional.js";
|
|
44
|
+
import { scoreDescription } from "./description-quality.js";
|
|
43
45
|
import { appendEvidenceEntry } from "./evidence.js";
|
|
44
46
|
import { extractFailurePatterns } from "./extract-patterns.js";
|
|
45
47
|
import {
|
|
@@ -94,6 +96,8 @@ export interface EvolveResult {
|
|
|
94
96
|
baselineResult?: BaselineMeasurement;
|
|
95
97
|
gateValidation?: ValidationResult;
|
|
96
98
|
sync_result?: SyncResult;
|
|
99
|
+
descriptionQualityBefore?: number;
|
|
100
|
+
descriptionQualityAfter?: number;
|
|
97
101
|
}
|
|
98
102
|
|
|
99
103
|
/**
|
|
@@ -247,16 +251,26 @@ export async function evolve(
|
|
|
247
251
|
);
|
|
248
252
|
|
|
249
253
|
/** Stamp every return with pipeline stats so callers always get them. */
|
|
250
|
-
const withStats = (r: Omit<EvolveResult, "llmCallCount" | "elapsedMs">): EvolveResult =>
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
254
|
+
const withStats = (r: Omit<EvolveResult, "llmCallCount" | "elapsedMs">): EvolveResult => {
|
|
255
|
+
const descQualityAfterScore = r.proposal
|
|
256
|
+
? scoreDescription(r.proposal.proposed_description, options.skillName).composite
|
|
257
|
+
: undefined;
|
|
258
|
+
return {
|
|
259
|
+
...r,
|
|
260
|
+
llmCallCount,
|
|
261
|
+
elapsedMs: Date.now() - pipelineStart,
|
|
262
|
+
...(syncResult ? { sync_result: syncResult } : {}),
|
|
263
|
+
...(descQualityBeforeScore != null
|
|
264
|
+
? { descriptionQualityBefore: descQualityBeforeScore }
|
|
265
|
+
: {}),
|
|
266
|
+
...(descQualityAfterScore != null ? { descriptionQualityAfter: descQualityAfterScore } : {}),
|
|
267
|
+
};
|
|
268
|
+
};
|
|
256
269
|
|
|
257
|
-
// Hoisted so catch block can preserve partial results on error
|
|
270
|
+
// Hoisted so catch block and withStats can preserve partial results on error
|
|
258
271
|
let lastProposal: EvolutionProposal | null = null;
|
|
259
272
|
let lastValidation: ValidationResult | null = null;
|
|
273
|
+
let descQualityBeforeScore: number | undefined;
|
|
260
274
|
|
|
261
275
|
try {
|
|
262
276
|
// -----------------------------------------------------------------------
|
|
@@ -281,7 +295,11 @@ export async function evolve(
|
|
|
281
295
|
const versionTag = skillVersion ? `, v${skillVersion}` : "";
|
|
282
296
|
const createdAuditDetails = (message: string) =>
|
|
283
297
|
`original_description:${rawContent}\n${message}`;
|
|
284
|
-
|
|
298
|
+
const descQualityBefore = scoreDescription(currentDescription, skillName);
|
|
299
|
+
descQualityBeforeScore = descQualityBefore.composite;
|
|
300
|
+
tui.done(
|
|
301
|
+
`Loaded SKILL.md (desc: ${currentDescription.length} chars${versionTag}, quality: ${descQualityBefore.composite})`,
|
|
302
|
+
);
|
|
285
303
|
|
|
286
304
|
if (options.syncFirst) {
|
|
287
305
|
tui.step(`Syncing source-truth telemetry${options.syncForce ? " (force)" : ""}...`);
|
|
@@ -1111,38 +1129,36 @@ Options:
|
|
|
1111
1129
|
}
|
|
1112
1130
|
|
|
1113
1131
|
if (!values.skill || !values["skill-path"]) {
|
|
1114
|
-
|
|
1115
|
-
|
|
1132
|
+
throw new CLIError(
|
|
1133
|
+
"--skill and --skill-path are required",
|
|
1134
|
+
"MISSING_FLAG",
|
|
1135
|
+
"selftune evolve --skill <name> --skill-path <path>",
|
|
1136
|
+
);
|
|
1116
1137
|
}
|
|
1117
1138
|
if ((values["sync-force"] ?? false) && !(values["sync-first"] ?? false)) {
|
|
1118
|
-
|
|
1119
|
-
|
|
1139
|
+
throw new CLIError(
|
|
1140
|
+
"--sync-force requires --sync-first",
|
|
1141
|
+
"INVALID_FLAG",
|
|
1142
|
+
"Add --sync-first when using --sync-force",
|
|
1143
|
+
);
|
|
1120
1144
|
}
|
|
1121
1145
|
|
|
1122
1146
|
const { detectAgent } = await import("../utils/llm-call.js");
|
|
1123
1147
|
const requestedAgent = values.agent;
|
|
1124
1148
|
if (requestedAgent && !Bun.which(requestedAgent)) {
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
message: `Agent CLI '${requestedAgent}' not found in PATH.`,
|
|
1130
|
-
action: "Install it or omit --agent to use auto-detection.",
|
|
1131
|
-
}),
|
|
1149
|
+
throw new CLIError(
|
|
1150
|
+
`Agent CLI '${requestedAgent}' not found in PATH.`,
|
|
1151
|
+
"AGENT_NOT_FOUND",
|
|
1152
|
+
"Install it or omit --agent to use auto-detection.",
|
|
1132
1153
|
);
|
|
1133
|
-
process.exit(1);
|
|
1134
1154
|
}
|
|
1135
1155
|
const agent = requestedAgent ?? detectAgent();
|
|
1136
1156
|
if (!agent) {
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
message: "No agent CLI (claude/codex/opencode) found in PATH.",
|
|
1142
|
-
action: "Install Claude Code, Codex, or OpenCode.",
|
|
1143
|
-
}),
|
|
1157
|
+
throw new CLIError(
|
|
1158
|
+
"No agent CLI (claude/codex/opencode) found in PATH.",
|
|
1159
|
+
"AGENT_NOT_FOUND",
|
|
1160
|
+
"Install Claude Code, Codex, or OpenCode.",
|
|
1144
1161
|
);
|
|
1145
|
-
process.exit(1);
|
|
1146
1162
|
}
|
|
1147
1163
|
|
|
1148
1164
|
// -------------------------------------------------------------------------
|
|
@@ -1150,20 +1166,27 @@ Options:
|
|
|
1150
1166
|
// -------------------------------------------------------------------------
|
|
1151
1167
|
const skillPath = values["skill-path"];
|
|
1152
1168
|
if (!skillPath) {
|
|
1153
|
-
|
|
1154
|
-
|
|
1169
|
+
throw new CLIError(
|
|
1170
|
+
"--skill-path is required.",
|
|
1171
|
+
"MISSING_FLAG",
|
|
1172
|
+
"selftune evolve --skill <name> --skill-path <path>",
|
|
1173
|
+
);
|
|
1155
1174
|
}
|
|
1156
1175
|
if (!existsSync(skillPath)) {
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1176
|
+
throw new CLIError(
|
|
1177
|
+
`SKILL.md not found at: ${skillPath}`,
|
|
1178
|
+
"FILE_NOT_FOUND",
|
|
1179
|
+
"Verify the --skill-path argument points to an existing SKILL.md file.",
|
|
1180
|
+
);
|
|
1160
1181
|
}
|
|
1161
1182
|
|
|
1162
1183
|
const evalSetPath = values["eval-set"];
|
|
1163
1184
|
if (evalSetPath && !existsSync(evalSetPath)) {
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1185
|
+
throw new CLIError(
|
|
1186
|
+
`Eval set file not found at: ${evalSetPath}`,
|
|
1187
|
+
"FILE_NOT_FOUND",
|
|
1188
|
+
"Verify the --eval-set argument points to an existing JSON file.",
|
|
1189
|
+
);
|
|
1167
1190
|
}
|
|
1168
1191
|
|
|
1169
1192
|
// If no eval-set provided, check that log files exist for auto-generation
|
|
@@ -1172,12 +1195,11 @@ Options:
|
|
|
1172
1195
|
const hasSkillLog = querySkillUsageRecords(dbCheck).length > 0;
|
|
1173
1196
|
const hasQueryLog = existsSync(QUERY_LOG);
|
|
1174
1197
|
if (!hasSkillLog && !hasQueryLog) {
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
"
|
|
1198
|
+
throw new CLIError(
|
|
1199
|
+
`No eval set provided and no telemetry logs found. Expected logs at: ${SKILL_LOG} and ${QUERY_LOG}`,
|
|
1200
|
+
"MISSING_DATA",
|
|
1201
|
+
"Either pass --eval-set <path> or generate logs first by using selftune-enabled skills.",
|
|
1178
1202
|
);
|
|
1179
|
-
console.error(` Expected logs at: ${SKILL_LOG} and ${QUERY_LOG}`);
|
|
1180
|
-
process.exit(1);
|
|
1181
1203
|
}
|
|
1182
1204
|
}
|
|
1183
1205
|
|
|
@@ -1244,6 +1266,12 @@ Options:
|
|
|
1244
1266
|
rationale: result.proposal?.rationale ?? "",
|
|
1245
1267
|
...(result.skillVersion ? { version: result.skillVersion } : {}),
|
|
1246
1268
|
dashboard_url: `http://localhost:3141/report/${encodeURIComponent(values.skill)}`,
|
|
1269
|
+
...(result.descriptionQualityBefore != null
|
|
1270
|
+
? { description_quality_before: result.descriptionQualityBefore }
|
|
1271
|
+
: {}),
|
|
1272
|
+
...(result.descriptionQualityAfter != null
|
|
1273
|
+
? { description_quality_after: result.descriptionQualityAfter }
|
|
1274
|
+
: {}),
|
|
1247
1275
|
};
|
|
1248
1276
|
console.log(JSON.stringify(summary, null, 2));
|
|
1249
1277
|
}
|
|
@@ -1276,20 +1304,5 @@ Options:
|
|
|
1276
1304
|
}
|
|
1277
1305
|
|
|
1278
1306
|
if (import.meta.main) {
|
|
1279
|
-
cliMain().catch(
|
|
1280
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
1281
|
-
const stack = err instanceof Error ? err.stack : undefined;
|
|
1282
|
-
console.error(`[FATAL] ${message}`);
|
|
1283
|
-
if (stack && process.env.SELFTUNE_VERBOSE === "1") {
|
|
1284
|
-
console.error(stack);
|
|
1285
|
-
}
|
|
1286
|
-
console.error(
|
|
1287
|
-
"\nTroubleshooting:\n" +
|
|
1288
|
-
" - Verify --skill-path points to a valid SKILL.md file\n" +
|
|
1289
|
-
" - Ensure eval data exists (run `selftune eval generate` first) or pass --eval-set\n" +
|
|
1290
|
-
" - Check that ANTHROPIC_API_KEY is set if using Claude\n" +
|
|
1291
|
-
" - Re-run with --verbose for full diagnostic output",
|
|
1292
|
-
);
|
|
1293
|
-
process.exit(1);
|
|
1294
|
-
});
|
|
1307
|
+
cliMain().catch(handleCLIError);
|
|
1295
1308
|
}
|
|
@@ -13,6 +13,7 @@ import { parseArgs } from "node:util";
|
|
|
13
13
|
|
|
14
14
|
import { updateContextAfterRollback } from "../memory/writer.js";
|
|
15
15
|
import type { EvolutionAuditEntry } from "../types.js";
|
|
16
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
16
17
|
import { replaceDescription } from "../utils/frontmatter.js";
|
|
17
18
|
import { appendAuditEntry, getLastDeployedProposal, readAuditTrail } from "./audit.js";
|
|
18
19
|
|
|
@@ -233,8 +234,11 @@ Options:
|
|
|
233
234
|
}
|
|
234
235
|
|
|
235
236
|
if (!values.skill || !values["skill-path"]) {
|
|
236
|
-
|
|
237
|
-
|
|
237
|
+
throw new CLIError(
|
|
238
|
+
"--skill and --skill-path are required",
|
|
239
|
+
"MISSING_FLAG",
|
|
240
|
+
"selftune evolve rollback --skill <name> --skill-path <path>",
|
|
241
|
+
);
|
|
238
242
|
}
|
|
239
243
|
|
|
240
244
|
const result = await rollback({
|
|
@@ -248,8 +252,5 @@ Options:
|
|
|
248
252
|
}
|
|
249
253
|
|
|
250
254
|
if (import.meta.main) {
|
|
251
|
-
cliMain().catch(
|
|
252
|
-
console.error(`[FATAL] ${err}`);
|
|
253
|
-
process.exit(1);
|
|
254
|
-
});
|
|
255
|
+
cliMain().catch(handleCLIError);
|
|
255
256
|
}
|
|
@@ -17,7 +17,7 @@ import { AGENT_CANDIDATES, TELEMETRY_LOG } from "../constants.js";
|
|
|
17
17
|
import { getDb } from "../localdb/db.js";
|
|
18
18
|
import { querySessionTelemetry, querySkillUsageRecords } from "../localdb/queries.js";
|
|
19
19
|
import type { GradingResult, SessionTelemetryRecord, SkillUsageRecord } from "../types.js";
|
|
20
|
-
import {
|
|
20
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
21
21
|
import { detectAgent as _detectAgent } from "../utils/llm-call.js";
|
|
22
22
|
import { readExcerpt } from "../utils/transcript.js";
|
|
23
23
|
import {
|
|
@@ -63,8 +63,7 @@ Options:
|
|
|
63
63
|
|
|
64
64
|
const skill = values.skill;
|
|
65
65
|
if (!skill) {
|
|
66
|
-
|
|
67
|
-
process.exit(1);
|
|
66
|
+
throw new CLIError("--skill is required", "MISSING_FLAG", "selftune auto-grade --skill <name>");
|
|
68
67
|
}
|
|
69
68
|
|
|
70
69
|
// --- Determine agent ---
|
|
@@ -72,10 +71,11 @@ Options:
|
|
|
72
71
|
const validAgents = [...AGENT_CANDIDATES];
|
|
73
72
|
if (values.agent) {
|
|
74
73
|
if (!validAgents.includes(values.agent)) {
|
|
75
|
-
|
|
76
|
-
`
|
|
74
|
+
throw new CLIError(
|
|
75
|
+
`Invalid --agent '${values.agent}'. Expected one of: ${validAgents.join(", ")}`,
|
|
76
|
+
"INVALID_FLAG",
|
|
77
|
+
`selftune auto-grade --skill <name> --agent ${validAgents[0]}`,
|
|
77
78
|
);
|
|
78
|
-
process.exit(1);
|
|
79
79
|
}
|
|
80
80
|
agent = values.agent;
|
|
81
81
|
} else {
|
|
@@ -83,28 +83,19 @@ Options:
|
|
|
83
83
|
}
|
|
84
84
|
|
|
85
85
|
if (!agent) {
|
|
86
|
-
|
|
87
|
-
`
|
|
88
|
-
|
|
86
|
+
throw new CLIError(
|
|
87
|
+
`No supported agent CLI (${AGENT_CANDIDATES.join("/")}) found in PATH`,
|
|
88
|
+
"AGENT_NOT_FOUND",
|
|
89
|
+
"Install one of the supported agent CLIs",
|
|
89
90
|
);
|
|
90
|
-
process.exit(1);
|
|
91
91
|
}
|
|
92
92
|
|
|
93
93
|
console.error(`[INFO] Auto-grade via agent: ${agent}`);
|
|
94
94
|
|
|
95
95
|
// --- Auto-find session ---
|
|
96
|
-
const
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
if (telemetryLog === TELEMETRY_LOG) {
|
|
100
|
-
const db = getDb();
|
|
101
|
-
telRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
|
|
102
|
-
skillUsageRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
|
|
103
|
-
} else {
|
|
104
|
-
// Intentional JSONL fallback: custom --telemetry-log path overrides SQLite reads
|
|
105
|
-
telRecords = readJsonl<SessionTelemetryRecord>(telemetryLog);
|
|
106
|
-
skillUsageRecords = [];
|
|
107
|
-
}
|
|
96
|
+
const db = getDb();
|
|
97
|
+
const telRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
|
|
98
|
+
const skillUsageRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
|
|
108
99
|
|
|
109
100
|
let telemetry: SessionTelemetryRecord;
|
|
110
101
|
let sessionId: string;
|
|
@@ -114,21 +105,22 @@ Options:
|
|
|
114
105
|
sessionId = values["session-id"];
|
|
115
106
|
const resolved = resolveSessionById(telRecords, sessionId);
|
|
116
107
|
if (!resolved) {
|
|
117
|
-
|
|
118
|
-
`
|
|
119
|
-
|
|
108
|
+
throw new CLIError(
|
|
109
|
+
`Session '${sessionId}' not found in telemetry or recoverable transcript data`,
|
|
110
|
+
"MISSING_DATA",
|
|
111
|
+
"Check the session ID or omit --session-id to auto-select the latest matching session",
|
|
120
112
|
);
|
|
121
|
-
process.exit(1);
|
|
122
113
|
}
|
|
123
114
|
telemetry = resolved.telemetry;
|
|
124
115
|
transcriptPath = resolved.transcriptPath;
|
|
125
116
|
} else {
|
|
126
117
|
const resolved = resolveLatestSessionForSkill(telRecords, skillUsageRecords, skill);
|
|
127
118
|
if (!resolved) {
|
|
128
|
-
|
|
129
|
-
`
|
|
119
|
+
throw new CLIError(
|
|
120
|
+
`No session found for skill '${skill}'`,
|
|
121
|
+
"MISSING_DATA",
|
|
122
|
+
"Run the skill first, or pass --session-id",
|
|
130
123
|
);
|
|
131
|
-
process.exit(1);
|
|
132
124
|
}
|
|
133
125
|
telemetry = resolved.telemetry;
|
|
134
126
|
sessionId = resolved.sessionId ?? "unknown";
|
|
@@ -169,8 +161,11 @@ Options:
|
|
|
169
161
|
agent,
|
|
170
162
|
});
|
|
171
163
|
} catch (err) {
|
|
172
|
-
|
|
173
|
-
|
|
164
|
+
throw new CLIError(
|
|
165
|
+
`Grading failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
166
|
+
"OPERATION_FAILED",
|
|
167
|
+
"Check agent availability and try again",
|
|
168
|
+
);
|
|
174
169
|
}
|
|
175
170
|
|
|
176
171
|
const outputPath = values.output ?? buildDefaultGradingOutputPath(sessionId);
|
|
@@ -203,8 +198,5 @@ Options:
|
|
|
203
198
|
|
|
204
199
|
// Guard: only run when invoked directly
|
|
205
200
|
if (import.meta.main) {
|
|
206
|
-
cliMain().catch(
|
|
207
|
-
console.error(`[FATAL] ${err}`);
|
|
208
|
-
process.exit(1);
|
|
209
|
-
});
|
|
201
|
+
cliMain().catch(handleCLIError);
|
|
210
202
|
}
|
|
@@ -28,7 +28,7 @@ import type {
|
|
|
28
28
|
SessionTelemetryRecord,
|
|
29
29
|
SkillUsageRecord,
|
|
30
30
|
} from "../types.js";
|
|
31
|
-
import {
|
|
31
|
+
import { CLIError, handleCLIError } from "../utils/cli-error.js";
|
|
32
32
|
import {
|
|
33
33
|
detectAgent as _detectAgent,
|
|
34
34
|
stripMarkdownFences as _stripMarkdownFences,
|
|
@@ -744,8 +744,7 @@ Options:
|
|
|
744
744
|
|
|
745
745
|
const skill = values.skill;
|
|
746
746
|
if (!skill) {
|
|
747
|
-
|
|
748
|
-
process.exit(1);
|
|
747
|
+
throw new CLIError("--skill is required", "MISSING_FLAG", "selftune grade --skill <name>");
|
|
749
748
|
}
|
|
750
749
|
|
|
751
750
|
// --- Determine agent ---
|
|
@@ -753,10 +752,11 @@ Options:
|
|
|
753
752
|
const validAgents = [...AGENT_CANDIDATES];
|
|
754
753
|
if (values.agent) {
|
|
755
754
|
if (!validAgents.includes(values.agent)) {
|
|
756
|
-
|
|
757
|
-
`
|
|
755
|
+
throw new CLIError(
|
|
756
|
+
`Invalid --agent '${values.agent}'. Expected one of: ${validAgents.join(", ")}`,
|
|
757
|
+
"INVALID_FLAG",
|
|
758
|
+
`selftune grade --skill <name> --agent ${validAgents[0]}`,
|
|
758
759
|
);
|
|
759
|
-
process.exit(1);
|
|
760
760
|
}
|
|
761
761
|
agent = values.agent;
|
|
762
762
|
} else {
|
|
@@ -764,11 +764,11 @@ Options:
|
|
|
764
764
|
}
|
|
765
765
|
|
|
766
766
|
if (!agent) {
|
|
767
|
-
|
|
768
|
-
`
|
|
769
|
-
|
|
767
|
+
throw new CLIError(
|
|
768
|
+
`No supported agent CLI (${AGENT_CANDIDATES.join("/")}) found in PATH`,
|
|
769
|
+
"AGENT_NOT_FOUND",
|
|
770
|
+
"Install claude, codex, or opencode CLI, then retry",
|
|
770
771
|
);
|
|
771
|
-
process.exit(1);
|
|
772
772
|
}
|
|
773
773
|
|
|
774
774
|
console.error(`[INFO] Grading via agent: ${agent}`);
|
|
@@ -778,8 +778,11 @@ Options:
|
|
|
778
778
|
if (values["evals-json"] && values["eval-id"] != null) {
|
|
779
779
|
const evalIdNum = Number(values["eval-id"]);
|
|
780
780
|
if (!Number.isFinite(evalIdNum) || !Number.isInteger(evalIdNum)) {
|
|
781
|
-
|
|
782
|
-
|
|
781
|
+
throw new CLIError(
|
|
782
|
+
`--eval-id must be a finite integer, got: ${values["eval-id"]}`,
|
|
783
|
+
"INVALID_FLAG",
|
|
784
|
+
"selftune grade --eval-id <integer>",
|
|
785
|
+
);
|
|
783
786
|
}
|
|
784
787
|
expectations = loadExpectationsFromEvalsJson(values["evals-json"], evalIdNum);
|
|
785
788
|
} else if (values.expectations?.length) {
|
|
@@ -804,18 +807,9 @@ Options:
|
|
|
804
807
|
let transcriptPath = "";
|
|
805
808
|
let sessionId = "unknown";
|
|
806
809
|
|
|
807
|
-
const
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
if (telemetryLog === TELEMETRY_LOG) {
|
|
811
|
-
const db = getDb();
|
|
812
|
-
telRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
|
|
813
|
-
skillUsageRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
|
|
814
|
-
} else {
|
|
815
|
-
// Intentional JSONL fallback: custom --telemetry-log path overrides SQLite reads
|
|
816
|
-
telRecords = readJsonl<SessionTelemetryRecord>(telemetryLog);
|
|
817
|
-
skillUsageRecords = [];
|
|
818
|
-
}
|
|
810
|
+
const db = getDb();
|
|
811
|
+
const telRecords = querySessionTelemetry(db) as SessionTelemetryRecord[];
|
|
812
|
+
const skillUsageRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
|
|
819
813
|
|
|
820
814
|
if (values.transcript) {
|
|
821
815
|
transcriptPath = values.transcript;
|
|
@@ -873,8 +867,11 @@ Options:
|
|
|
873
867
|
agent,
|
|
874
868
|
});
|
|
875
869
|
} catch (err) {
|
|
876
|
-
|
|
877
|
-
|
|
870
|
+
throw new CLIError(
|
|
871
|
+
`Grading failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
872
|
+
"OPERATION_FAILED",
|
|
873
|
+
"Check agent availability and try again",
|
|
874
|
+
);
|
|
878
875
|
}
|
|
879
876
|
|
|
880
877
|
const outputPath = values.output ?? buildDefaultGradingOutputPath(sessionId);
|
|
@@ -898,8 +895,5 @@ Options:
|
|
|
898
895
|
|
|
899
896
|
// Guard: only run when invoked directly
|
|
900
897
|
if (import.meta.main) {
|
|
901
|
-
cliMain().catch(
|
|
902
|
-
console.error(`[FATAL] ${err}`);
|
|
903
|
-
process.exit(1);
|
|
904
|
-
});
|
|
898
|
+
cliMain().catch(handleCLIError);
|
|
905
899
|
}
|
|
@@ -179,9 +179,18 @@ if (import.meta.main) {
|
|
|
179
179
|
const statePath = sessionStatePath(sessionId);
|
|
180
180
|
const suggestions = evaluateRules(DEFAULT_RULES, ctx, statePath);
|
|
181
181
|
|
|
182
|
-
|
|
183
|
-
// Output
|
|
184
|
-
|
|
182
|
+
if (suggestions.length > 0) {
|
|
183
|
+
// Output as JSON with additionalContext — Claude Code adds this to
|
|
184
|
+
// Claude's context on UserPromptSubmit (more reliable than stderr)
|
|
185
|
+
const context = suggestions.map((s) => `[selftune] Suggestion: ${s}`).join("\n");
|
|
186
|
+
process.stdout.write(
|
|
187
|
+
JSON.stringify({
|
|
188
|
+
hookSpecificOutput: {
|
|
189
|
+
hookEventName: "UserPromptSubmit",
|
|
190
|
+
additionalContext: context,
|
|
191
|
+
},
|
|
192
|
+
}),
|
|
193
|
+
);
|
|
185
194
|
}
|
|
186
195
|
}
|
|
187
196
|
} catch {
|
|
@@ -19,7 +19,6 @@ import { basename, dirname, join } from "node:path";
|
|
|
19
19
|
|
|
20
20
|
import { EVOLUTION_AUDIT_LOG, SELFTUNE_CONFIG_DIR } from "../constants.js";
|
|
21
21
|
import type { PreToolUsePayload } from "../types.js";
|
|
22
|
-
import { readJsonl } from "../utils/jsonl.js";
|
|
23
22
|
|
|
24
23
|
// ---------------------------------------------------------------------------
|
|
25
24
|
// Detection helpers (same pattern as skill-change-guard)
|
|
@@ -35,40 +34,31 @@ function extractSkillName(filePath: string): string {
|
|
|
35
34
|
}
|
|
36
35
|
|
|
37
36
|
// ---------------------------------------------------------------------------
|
|
38
|
-
// Active monitoring check (SQLite
|
|
37
|
+
// Active monitoring check (always SQLite)
|
|
39
38
|
// ---------------------------------------------------------------------------
|
|
40
39
|
|
|
41
40
|
/**
|
|
42
41
|
* Check if a skill has an active deployed evolution (meaning it's under monitoring).
|
|
43
|
-
* SQLite is the default read path; JSONL is used only for test/custom-path overrides.
|
|
44
42
|
*
|
|
45
43
|
* A skill is "actively monitored" if its last audit action is "deployed".
|
|
46
44
|
* If the last action is "rolled_back", it's no longer monitored.
|
|
47
45
|
*/
|
|
48
46
|
export async function checkActiveMonitoring(
|
|
49
47
|
skillName: string,
|
|
50
|
-
|
|
48
|
+
_auditLogPath: string,
|
|
51
49
|
): Promise<boolean> {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
entries = readJsonl<{ skill_name?: string; action: string }>(auditLogPath);
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
// Filter entries for this skill by skill_name field
|
|
68
|
-
const skillEntries = entries.filter((e) => e.skill_name === skillName);
|
|
69
|
-
if (skillEntries.length === 0) return false;
|
|
70
|
-
|
|
71
|
-
const lastEntry = skillEntries[skillEntries.length - 1];
|
|
50
|
+
const { getDb } = await import("../localdb/db.js");
|
|
51
|
+
const { queryEvolutionAudit } = await import("../localdb/queries.js");
|
|
52
|
+
const db = getDb();
|
|
53
|
+
const entries = queryEvolutionAudit(db, skillName) as Array<{
|
|
54
|
+
skill_name?: string;
|
|
55
|
+
action: string;
|
|
56
|
+
}>;
|
|
57
|
+
|
|
58
|
+
if (entries.length === 0) return false;
|
|
59
|
+
|
|
60
|
+
// queryEvolutionAudit returns DESC order, so [0] is the most recent entry
|
|
61
|
+
const lastEntry = entries[0];
|
|
72
62
|
return lastEntry.action === "deployed";
|
|
73
63
|
}
|
|
74
64
|
|
|
@@ -21,7 +21,6 @@ import {
|
|
|
21
21
|
reservePromptIdentity,
|
|
22
22
|
} from "../normalization.js";
|
|
23
23
|
import type { ImprovementSignalRecord, PromptSubmitPayload, QueryLogRecord } from "../types.js";
|
|
24
|
-
import { appendJsonl } from "../utils/jsonl.js";
|
|
25
24
|
|
|
26
25
|
// ---------------------------------------------------------------------------
|
|
27
26
|
// Installed skill name cache
|
|
@@ -155,7 +154,13 @@ export async function processPrompt(
|
|
|
155
154
|
promptStatePath?: string,
|
|
156
155
|
_signalLogPath?: string,
|
|
157
156
|
): Promise<QueryLogRecord | null> {
|
|
158
|
-
const
|
|
157
|
+
const rawPrompt =
|
|
158
|
+
typeof payload.prompt === "string"
|
|
159
|
+
? payload.prompt
|
|
160
|
+
: typeof payload.user_prompt === "string"
|
|
161
|
+
? payload.user_prompt
|
|
162
|
+
: "";
|
|
163
|
+
const query = rawPrompt.trim();
|
|
159
164
|
|
|
160
165
|
if (!query) return null;
|
|
161
166
|
|
|
@@ -179,13 +184,6 @@ export async function processPrompt(
|
|
|
179
184
|
/* hooks must never block */
|
|
180
185
|
}
|
|
181
186
|
|
|
182
|
-
// JSONL backup (best-effort, hooks must never block)
|
|
183
|
-
try {
|
|
184
|
-
appendJsonl(logPath, record);
|
|
185
|
-
} catch {
|
|
186
|
-
/* hooks must never block */
|
|
187
|
-
}
|
|
188
|
-
|
|
189
187
|
// Emit canonical prompt record (additive)
|
|
190
188
|
const baseInput: CanonicalBaseInput = {
|
|
191
189
|
platform: "claude_code",
|
|
@@ -20,7 +20,6 @@ import {
|
|
|
20
20
|
getLatestPromptIdentity,
|
|
21
21
|
} from "../normalization.js";
|
|
22
22
|
import type { SessionTelemetryRecord, StopPayload } from "../types.js";
|
|
23
|
-
import { appendJsonl } from "../utils/jsonl.js";
|
|
24
23
|
import { parseTranscript } from "../utils/transcript.js";
|
|
25
24
|
|
|
26
25
|
const LOCK_STALE_MS = 30 * 60 * 1000;
|
|
@@ -120,13 +119,6 @@ export async function processSessionStop(
|
|
|
120
119
|
/* hooks must never block */
|
|
121
120
|
}
|
|
122
121
|
|
|
123
|
-
// JSONL backup (append-only, fail-open)
|
|
124
|
-
try {
|
|
125
|
-
appendJsonl(logPath, record);
|
|
126
|
-
} catch {
|
|
127
|
-
/* JSONL is a backup — never block on failure */
|
|
128
|
-
}
|
|
129
|
-
|
|
130
122
|
// Emit canonical session + execution fact records (additive)
|
|
131
123
|
const baseInput: CanonicalBaseInput = {
|
|
132
124
|
platform: "claude_code",
|