selftune 0.2.18 → 0.2.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -4
- package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +60 -0
- package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/alpha-upload/stage-canonical.ts +7 -6
- package/cli/selftune/constants.ts +10 -0
- package/cli/selftune/contribute/contribute.ts +30 -2
- package/cli/selftune/contribution-config.ts +249 -0
- package/cli/selftune/contribution-relay.ts +177 -0
- package/cli/selftune/contribution-signals.ts +219 -0
- package/cli/selftune/contribution-staging.ts +147 -0
- package/cli/selftune/contributions.ts +532 -0
- package/cli/selftune/creator-contributions.ts +333 -0
- package/cli/selftune/dashboard-contract.ts +209 -1
- package/cli/selftune/dashboard-server.ts +45 -11
- package/cli/selftune/eval/family-overlap.ts +714 -0
- package/cli/selftune/eval/hooks-to-evals.ts +182 -28
- package/cli/selftune/eval/synthetic-evals.ts +298 -11
- package/cli/selftune/evolution/evidence.ts +5 -0
- package/cli/selftune/evolution/evolve-body.ts +62 -2
- package/cli/selftune/evolution/evolve.ts +58 -1
- package/cli/selftune/evolution/validate-body.ts +10 -0
- package/cli/selftune/evolution/validate-host-replay.ts +236 -0
- package/cli/selftune/evolution/validate-proposal.ts +10 -0
- package/cli/selftune/evolution/validate-routing.ts +112 -5
- package/cli/selftune/export.ts +2 -2
- package/cli/selftune/index.ts +41 -5
- package/cli/selftune/ingestors/codex-rollout.ts +31 -35
- package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
- package/cli/selftune/localdb/db.ts +2 -2
- package/cli/selftune/localdb/direct-write.ts +8 -3
- package/cli/selftune/localdb/materialize.ts +7 -2
- package/cli/selftune/localdb/queries.ts +712 -31
- package/cli/selftune/localdb/schema.ts +30 -1
- package/cli/selftune/recover.ts +153 -0
- package/cli/selftune/repair/skill-usage.ts +363 -4
- package/cli/selftune/routes/actions.ts +35 -1
- package/cli/selftune/routes/analytics.ts +14 -0
- package/cli/selftune/routes/index.ts +1 -0
- package/cli/selftune/routes/overview.ts +112 -4
- package/cli/selftune/routes/skill-report.ts +575 -11
- package/cli/selftune/status.ts +81 -2
- package/cli/selftune/sync.ts +56 -2
- package/cli/selftune/trust-model.ts +66 -0
- package/cli/selftune/types.ts +103 -0
- package/cli/selftune/utils/skill-detection.ts +43 -0
- package/cli/selftune/utils/text-similarity.ts +73 -0
- package/cli/selftune/watchlist.ts +65 -0
- package/package.json +1 -1
- package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
- package/packages/ui/src/components/EvidenceViewer.tsx +419 -145
- package/packages/ui/src/components/EvolutionTimeline.tsx +81 -29
- package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
- package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
- package/packages/ui/src/components/section-cards.tsx +12 -9
- package/packages/ui/src/primitives/card.tsx +1 -1
- package/packages/ui/src/types.ts +4 -0
- package/skill/SKILL.md +11 -1
- package/skill/Workflows/AlphaUpload.md +4 -0
- package/skill/Workflows/Composability.md +78 -0
- package/skill/Workflows/Contribute.md +6 -3
- package/skill/Workflows/Contributions.md +97 -0
- package/skill/Workflows/CreatorContributions.md +74 -0
- package/skill/Workflows/Dashboard.md +31 -0
- package/skill/Workflows/Evals.md +57 -8
- package/skill/Workflows/Evolve.md +23 -0
- package/skill/Workflows/Ingest.md +7 -0
- package/skill/Workflows/Initialize.md +20 -1
- package/skill/Workflows/Recover.md +84 -0
- package/skill/Workflows/RepairSkillUsage.md +12 -4
- package/skill/Workflows/Sync.md +18 -12
- package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
- package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
- package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
- package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12
|
@@ -5,9 +5,43 @@
|
|
|
5
5
|
* and running trigger accuracy checks against an eval set.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import type {
|
|
8
|
+
import type {
|
|
9
|
+
BodyEvolutionProposal,
|
|
10
|
+
BodyValidationResult,
|
|
11
|
+
EvalEntry,
|
|
12
|
+
RoutingReplayEntryResult,
|
|
13
|
+
RoutingReplayFixture,
|
|
14
|
+
ValidationMode,
|
|
15
|
+
} from "../types.js";
|
|
9
16
|
import { callLlm } from "../utils/llm-call.js";
|
|
10
17
|
import { buildTriggerCheckPrompt, parseTriggerResponse } from "../utils/trigger-check.js";
|
|
18
|
+
import { runHostReplayFixture } from "./validate-host-replay.js";
|
|
19
|
+
|
|
20
|
+
export interface RoutingReplayRunnerInput {
|
|
21
|
+
routing: string;
|
|
22
|
+
evalSet: EvalEntry[];
|
|
23
|
+
agent: string;
|
|
24
|
+
fixture: RoutingReplayFixture;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export type RoutingReplayRunner = (
|
|
28
|
+
input: RoutingReplayRunnerInput,
|
|
29
|
+
) => Promise<RoutingReplayEntryResult[]>;
|
|
30
|
+
|
|
31
|
+
export interface RoutingValidationOptions {
|
|
32
|
+
replayFixture?: RoutingReplayFixture;
|
|
33
|
+
replayRunner?: RoutingReplayRunner;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface RoutingTriggerAccuracyResult {
|
|
37
|
+
before_pass_rate: number;
|
|
38
|
+
after_pass_rate: number;
|
|
39
|
+
improved: boolean;
|
|
40
|
+
validation_mode: ValidationMode;
|
|
41
|
+
validation_agent: string;
|
|
42
|
+
validation_fixture_id?: string;
|
|
43
|
+
per_entry_results?: RoutingReplayEntryResult[];
|
|
44
|
+
}
|
|
11
45
|
|
|
12
46
|
// ---------------------------------------------------------------------------
|
|
13
47
|
// Structural validation
|
|
@@ -77,9 +111,70 @@ export async function validateRoutingTriggerAccuracy(
|
|
|
77
111
|
evalSet: EvalEntry[],
|
|
78
112
|
agent: string,
|
|
79
113
|
modelFlag?: string,
|
|
80
|
-
|
|
114
|
+
options: RoutingValidationOptions = {},
|
|
115
|
+
): Promise<RoutingTriggerAccuracyResult> {
|
|
81
116
|
if (evalSet.length === 0) {
|
|
82
|
-
return {
|
|
117
|
+
return {
|
|
118
|
+
before_pass_rate: 0,
|
|
119
|
+
after_pass_rate: 0,
|
|
120
|
+
improved: false,
|
|
121
|
+
validation_mode: "structural_guard",
|
|
122
|
+
validation_agent: agent,
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if (options.replayFixture && options.replayRunner) {
|
|
127
|
+
const beforeResults = await options.replayRunner({
|
|
128
|
+
routing: originalRouting,
|
|
129
|
+
evalSet,
|
|
130
|
+
agent,
|
|
131
|
+
fixture: options.replayFixture,
|
|
132
|
+
});
|
|
133
|
+
const afterResults = await options.replayRunner({
|
|
134
|
+
routing: proposedRouting,
|
|
135
|
+
evalSet,
|
|
136
|
+
agent,
|
|
137
|
+
fixture: options.replayFixture,
|
|
138
|
+
});
|
|
139
|
+
const beforePassed = beforeResults.filter((result) => result.passed).length;
|
|
140
|
+
const afterPassed = afterResults.filter((result) => result.passed).length;
|
|
141
|
+
const total = evalSet.length;
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
before_pass_rate: beforePassed / total,
|
|
145
|
+
after_pass_rate: afterPassed / total,
|
|
146
|
+
improved: afterPassed > beforePassed,
|
|
147
|
+
validation_mode: "host_replay",
|
|
148
|
+
validation_agent: agent,
|
|
149
|
+
validation_fixture_id: options.replayFixture.fixture_id,
|
|
150
|
+
per_entry_results: afterResults,
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (options.replayFixture) {
|
|
155
|
+
const beforeResults = runHostReplayFixture({
|
|
156
|
+
routing: originalRouting,
|
|
157
|
+
evalSet,
|
|
158
|
+
fixture: options.replayFixture,
|
|
159
|
+
});
|
|
160
|
+
const afterResults = runHostReplayFixture({
|
|
161
|
+
routing: proposedRouting,
|
|
162
|
+
evalSet,
|
|
163
|
+
fixture: options.replayFixture,
|
|
164
|
+
});
|
|
165
|
+
const beforePassed = beforeResults.filter((result) => result.passed).length;
|
|
166
|
+
const afterPassed = afterResults.filter((result) => result.passed).length;
|
|
167
|
+
const total = evalSet.length;
|
|
168
|
+
|
|
169
|
+
return {
|
|
170
|
+
before_pass_rate: beforePassed / total,
|
|
171
|
+
after_pass_rate: afterPassed / total,
|
|
172
|
+
improved: afterPassed > beforePassed,
|
|
173
|
+
validation_mode: "host_replay",
|
|
174
|
+
validation_agent: agent,
|
|
175
|
+
validation_fixture_id: options.replayFixture.fixture_id,
|
|
176
|
+
per_entry_results: afterResults,
|
|
177
|
+
};
|
|
83
178
|
}
|
|
84
179
|
|
|
85
180
|
const systemPrompt = "You are an evaluation assistant. Answer only YES or NO.";
|
|
@@ -113,6 +208,8 @@ export async function validateRoutingTriggerAccuracy(
|
|
|
113
208
|
before_pass_rate: beforePassRate,
|
|
114
209
|
after_pass_rate: afterPassRate,
|
|
115
210
|
improved: afterPassRate > beforePassRate,
|
|
211
|
+
validation_mode: "llm_judge",
|
|
212
|
+
validation_agent: agent,
|
|
116
213
|
};
|
|
117
214
|
}
|
|
118
215
|
|
|
@@ -126,6 +223,7 @@ export async function validateRoutingProposal(
|
|
|
126
223
|
evalSet: EvalEntry[],
|
|
127
224
|
agent: string,
|
|
128
225
|
modelFlag?: string,
|
|
226
|
+
options: RoutingValidationOptions = {},
|
|
129
227
|
): Promise<BodyValidationResult> {
|
|
130
228
|
const gateResults: Array<{ gate: string; passed: boolean; reason: string }> = [];
|
|
131
229
|
|
|
@@ -145,6 +243,8 @@ export async function validateRoutingProposal(
|
|
|
145
243
|
gate_results: gateResults,
|
|
146
244
|
improved: false,
|
|
147
245
|
regressions: [],
|
|
246
|
+
validation_mode: "structural_guard",
|
|
247
|
+
validation_agent: agent,
|
|
148
248
|
};
|
|
149
249
|
}
|
|
150
250
|
|
|
@@ -155,13 +255,14 @@ export async function validateRoutingProposal(
|
|
|
155
255
|
evalSet,
|
|
156
256
|
agent,
|
|
157
257
|
modelFlag,
|
|
258
|
+
options,
|
|
158
259
|
);
|
|
159
260
|
gateResults.push({
|
|
160
261
|
gate: "trigger_accuracy",
|
|
161
262
|
passed: accuracy.improved,
|
|
162
263
|
reason: accuracy.improved
|
|
163
|
-
? `Improved: ${(accuracy.before_pass_rate * 100).toFixed(1)}% -> ${(accuracy.after_pass_rate * 100).toFixed(1)}%`
|
|
164
|
-
: `Not improved: ${(accuracy.before_pass_rate * 100).toFixed(1)}% -> ${(accuracy.after_pass_rate * 100).toFixed(1)}%`,
|
|
264
|
+
? `Improved via ${accuracy.validation_mode}: ${(accuracy.before_pass_rate * 100).toFixed(1)}% -> ${(accuracy.after_pass_rate * 100).toFixed(1)}%`
|
|
265
|
+
: `Not improved via ${accuracy.validation_mode}: ${(accuracy.before_pass_rate * 100).toFixed(1)}% -> ${(accuracy.after_pass_rate * 100).toFixed(1)}%`,
|
|
165
266
|
});
|
|
166
267
|
|
|
167
268
|
const gatesPassed = gateResults.filter((g) => g.passed).length;
|
|
@@ -173,5 +274,11 @@ export async function validateRoutingProposal(
|
|
|
173
274
|
gate_results: gateResults,
|
|
174
275
|
improved: gatesPassed === 2,
|
|
175
276
|
regressions: [],
|
|
277
|
+
validation_mode: accuracy.validation_mode,
|
|
278
|
+
validation_agent: accuracy.validation_agent,
|
|
279
|
+
validation_fixture_id: accuracy.validation_fixture_id,
|
|
280
|
+
before_pass_rate: accuracy.before_pass_rate,
|
|
281
|
+
after_pass_rate: accuracy.after_pass_rate,
|
|
282
|
+
per_entry_results: accuracy.per_entry_results,
|
|
176
283
|
};
|
|
177
284
|
}
|
package/cli/selftune/export.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Export SQLite data to JSONL format.
|
|
3
|
-
*
|
|
4
|
-
*
|
|
3
|
+
* Use this only when you explicitly need portable/debuggable JSONL snapshots
|
|
4
|
+
* for recovery, the contribute workflow, or external tools.
|
|
5
5
|
*/
|
|
6
6
|
import { mkdirSync, writeFileSync } from "node:fs";
|
|
7
7
|
import { join } from "node:path";
|
package/cli/selftune/index.ts
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* selftune ingest <agent> — Ingest agent sessions (claude, codex, opencode, openclaw, wrap-codex)
|
|
7
7
|
* selftune grade [mode] — Grade skill sessions (auto, baseline)
|
|
8
8
|
* selftune evolve [target] — Evolve skill descriptions (body, rollback)
|
|
9
|
-
* selftune eval <action> — Evaluation tools (generate, unit-test, import, composability)
|
|
9
|
+
* selftune eval <action> — Evaluation tools (generate, unit-test, import, composability, family-overlap)
|
|
10
10
|
* selftune sync — Sync source-truth telemetry across supported agents
|
|
11
11
|
* selftune orchestrate — Run autonomous core loop (sync → status → evolve → watch)
|
|
12
12
|
* selftune init — Initialize agent identity and config
|
|
@@ -19,11 +19,14 @@
|
|
|
19
19
|
* selftune cron — Scheduling & automation (setup, list, remove)
|
|
20
20
|
* selftune badge — Generate skill health badges for READMEs
|
|
21
21
|
* selftune contribute — Export anonymized skill data for community
|
|
22
|
+
* selftune contributions — Manage creator-directed sharing preferences
|
|
23
|
+
* selftune creator-contributions — Manage creator-side contribution configs
|
|
22
24
|
* selftune workflows — Discover and manage multi-skill workflows
|
|
23
25
|
* selftune quickstart — Guided onboarding: init, ingest, status, and suggestions
|
|
24
26
|
* selftune repair-skill-usage — Rebuild trustworthy skill usage from transcripts
|
|
25
|
-
* selftune export — Export SQLite data to JSONL
|
|
27
|
+
* selftune export — Export SQLite data to JSONL snapshots
|
|
26
28
|
* selftune export-canonical — Export canonical telemetry for downstream ingestion
|
|
29
|
+
* selftune recover — Recover SQLite from legacy/exported JSONL
|
|
27
30
|
* selftune telemetry — Manage anonymous usage analytics (status, enable, disable)
|
|
28
31
|
* selftune alpha <subcommand> — Alpha program management (upload)
|
|
29
32
|
* selftune hook <name> — Run a hook by name (prompt-log, session-stop, etc.)
|
|
@@ -46,7 +49,7 @@ Commands:
|
|
|
46
49
|
ingest <agent> Ingest agent sessions (claude, codex, opencode, openclaw, wrap-codex)
|
|
47
50
|
grade [mode] Grade skill sessions (auto, baseline)
|
|
48
51
|
evolve [target] Evolve skill descriptions (body, rollback)
|
|
49
|
-
eval <action> Evaluation tools (generate, unit-test, import, composability)
|
|
52
|
+
eval <action> Evaluation tools (generate, unit-test, import, composability, family-overlap)
|
|
50
53
|
sync Sync source-truth telemetry across supported agents
|
|
51
54
|
orchestrate Run autonomous core loop (sync → status → evolve → watch)
|
|
52
55
|
init Initialize agent identity and config
|
|
@@ -59,11 +62,14 @@ Commands:
|
|
|
59
62
|
cron Scheduling & automation (setup, list, remove)
|
|
60
63
|
badge Generate skill health badges for READMEs
|
|
61
64
|
contribute Export anonymized skill data for community
|
|
65
|
+
contributions Manage creator-directed sharing preferences
|
|
66
|
+
creator-contributions Manage creator-side contribution configs
|
|
62
67
|
workflows Discover and manage multi-skill workflows
|
|
63
68
|
quickstart Guided onboarding: init, ingest, status, and suggestions
|
|
64
69
|
repair-skill-usage Rebuild trustworthy skill usage from transcripts
|
|
65
|
-
export Export SQLite data to JSONL
|
|
70
|
+
export Export SQLite data to JSONL snapshots
|
|
66
71
|
export-canonical Export canonical telemetry for downstream ingestion
|
|
72
|
+
recover Recover SQLite from legacy/exported JSONL
|
|
67
73
|
alpha <subcommand> Alpha program management (upload)
|
|
68
74
|
telemetry Manage anonymous usage analytics (status, enable, disable)
|
|
69
75
|
hook <name> Run a hook by name (prompt-log, session-stop, etc.)
|
|
@@ -254,6 +260,7 @@ Actions:
|
|
|
254
260
|
unit-test Run or generate skill unit tests
|
|
255
261
|
import Import SkillsBench task corpus as eval entries
|
|
256
262
|
composability Analyze skill co-occurrence conflicts
|
|
263
|
+
family-overlap Detect sibling-skill overlap and consolidation pressure
|
|
257
264
|
|
|
258
265
|
Run 'selftune eval <action> --help' for action-specific options.`);
|
|
259
266
|
process.exit(0);
|
|
@@ -341,6 +348,17 @@ Run 'selftune eval <action> --help' for action-specific options.`);
|
|
|
341
348
|
console.log(JSON.stringify(report, null, 2));
|
|
342
349
|
break;
|
|
343
350
|
}
|
|
351
|
+
case "family-overlap": {
|
|
352
|
+
if (process.argv[2] === "--help" || process.argv[2] === "-h") {
|
|
353
|
+
console.log(
|
|
354
|
+
"selftune eval family-overlap --prefix <family-> | --skills <a,b,c> [--parent-skill <name>] [--min-overlap 0.3] [--min-shared 2]",
|
|
355
|
+
);
|
|
356
|
+
process.exit(0);
|
|
357
|
+
}
|
|
358
|
+
const { cliMain } = await import("./eval/family-overlap.js");
|
|
359
|
+
await cliMain();
|
|
360
|
+
break;
|
|
361
|
+
}
|
|
344
362
|
default:
|
|
345
363
|
throw new CLIError(
|
|
346
364
|
`Unknown eval action: ${sub}`,
|
|
@@ -368,6 +386,16 @@ Run 'selftune eval <action> --help' for action-specific options.`);
|
|
|
368
386
|
await cliMain();
|
|
369
387
|
break;
|
|
370
388
|
}
|
|
389
|
+
case "contributions": {
|
|
390
|
+
const { cliMain } = await import("./contributions.js");
|
|
391
|
+
await cliMain();
|
|
392
|
+
break;
|
|
393
|
+
}
|
|
394
|
+
case "creator-contributions": {
|
|
395
|
+
const { cliMain } = await import("./creator-contributions.js");
|
|
396
|
+
await cliMain();
|
|
397
|
+
break;
|
|
398
|
+
}
|
|
371
399
|
case "watch": {
|
|
372
400
|
const { cliMain } = await import("./monitoring/watch.js");
|
|
373
401
|
await cliMain();
|
|
@@ -527,11 +555,14 @@ Run 'selftune cron <subcommand> --help' for subcommand-specific options.`);
|
|
|
527
555
|
throw new CLIError(`Invalid arguments: ${message}`, "INVALID_FLAG", "selftune export --help");
|
|
528
556
|
}
|
|
529
557
|
if (values.help) {
|
|
530
|
-
console.log(`selftune export — Export SQLite data to JSONL
|
|
558
|
+
console.log(`selftune export — Export SQLite data to JSONL snapshots
|
|
531
559
|
|
|
532
560
|
Usage:
|
|
533
561
|
selftune export [tables...] [options]
|
|
534
562
|
|
|
563
|
+
Use this for portability, debugging, contribute flows, or explicit recovery
|
|
564
|
+
snapshots. Normal runtime reads and writes stay in SQLite.
|
|
565
|
+
|
|
535
566
|
Tables (default: all):
|
|
536
567
|
telemetry Session telemetry records
|
|
537
568
|
skills Skill usage records
|
|
@@ -570,6 +601,11 @@ Options:
|
|
|
570
601
|
cliMain();
|
|
571
602
|
break;
|
|
572
603
|
}
|
|
604
|
+
case "recover": {
|
|
605
|
+
const { cliMain } = await import("./recover.js");
|
|
606
|
+
cliMain();
|
|
607
|
+
break;
|
|
608
|
+
}
|
|
573
609
|
case "orchestrate": {
|
|
574
610
|
const { cliMain } = await import("./orchestrate.js");
|
|
575
611
|
await cliMain();
|
|
@@ -52,9 +52,9 @@ import type {
|
|
|
52
52
|
import { handleCLIError } from "../utils/cli-error.js";
|
|
53
53
|
import { loadMarker, saveMarker } from "../utils/jsonl.js";
|
|
54
54
|
import { extractActionableQueryText } from "../utils/query-filter.js";
|
|
55
|
+
import { getInternalPromptTargetSkill, isWrappedNonUserPart } from "../utils/skill-detection.js";
|
|
55
56
|
import {
|
|
56
57
|
classifySkillPath,
|
|
57
|
-
containsWholeSkillMention,
|
|
58
58
|
extractExplicitSkillMentions,
|
|
59
59
|
extractSkillNamesFromInstructions,
|
|
60
60
|
extractSkillNamesFromPathReferences,
|
|
@@ -228,6 +228,15 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
|
|
|
228
228
|
let observedCwd: string | undefined;
|
|
229
229
|
const sessionSkillNames = new Set(skillNames);
|
|
230
230
|
let hasActionablePrompt = false;
|
|
231
|
+
const markSkillTriggered = (skillName: string, evidence: "explicit" | "inferred"): void => {
|
|
232
|
+
if (!skillsTriggered.includes(skillName)) {
|
|
233
|
+
skillsTriggered.push(skillName);
|
|
234
|
+
}
|
|
235
|
+
const existingEvidence = skillEvidence.get(skillName);
|
|
236
|
+
if (existingEvidence !== "explicit") {
|
|
237
|
+
skillEvidence.set(skillName, evidence);
|
|
238
|
+
}
|
|
239
|
+
};
|
|
231
240
|
const rememberSessionSkillNames = (text: unknown): void => {
|
|
232
241
|
if (typeof text !== "string" || !text) return;
|
|
233
242
|
for (const skillName of extractSkillNamesFromInstructions(text, sessionSkillNames)) {
|
|
@@ -240,33 +249,23 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
|
|
|
240
249
|
sessionSkillNames.add(skillName);
|
|
241
250
|
}
|
|
242
251
|
};
|
|
243
|
-
const detectTriggeredSkills = (text: unknown): void => {
|
|
244
|
-
if (typeof text !== "string" || !text) return;
|
|
245
|
-
for (const skillName of sessionSkillNames) {
|
|
246
|
-
if (containsWholeSkillMention(text, skillName) && !skillsTriggered.includes(skillName)) {
|
|
247
|
-
skillsTriggered.push(skillName);
|
|
248
|
-
}
|
|
249
|
-
if (containsWholeSkillMention(text, skillName) && !skillEvidence.has(skillName)) {
|
|
250
|
-
skillEvidence.set(skillName, "inferred");
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
};
|
|
254
252
|
const detectExplicitPromptSkillMentions = (text: unknown): void => {
|
|
255
253
|
if (typeof text !== "string" || !text) return;
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
254
|
+
if (isWrappedNonUserPart(text)) return;
|
|
255
|
+
const actionableText = extractActionableQueryText(text) ?? text;
|
|
256
|
+
const internalTargetSkill = getInternalPromptTargetSkill(actionableText, sessionSkillNames);
|
|
257
|
+
if (internalTargetSkill) {
|
|
258
|
+
markSkillTriggered(internalTargetSkill, "explicit");
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
for (const skillName of extractExplicitSkillMentions(actionableText, sessionSkillNames)) {
|
|
262
|
+
markSkillTriggered(skillName, "explicit");
|
|
261
263
|
}
|
|
262
264
|
};
|
|
263
265
|
const detectExplicitSkillReads = (text: unknown): void => {
|
|
264
266
|
if (typeof text !== "string" || !text) return;
|
|
265
267
|
for (const skillName of extractSkillNamesFromPathReferences(text, sessionSkillNames)) {
|
|
266
|
-
|
|
267
|
-
skillsTriggered.push(skillName);
|
|
268
|
-
}
|
|
269
|
-
skillEvidence.set(skillName, "explicit");
|
|
268
|
+
markSkillTriggered(skillName, "explicit");
|
|
270
269
|
}
|
|
271
270
|
};
|
|
272
271
|
const rememberPromptCandidate = (value: unknown): void => {
|
|
@@ -352,27 +351,26 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
|
|
|
352
351
|
if (itemType === "function_call") {
|
|
353
352
|
const fnName = (payload.name as string) ?? "function_call";
|
|
354
353
|
toolCalls[fnName] = (toolCalls[fnName] ?? 0) + 1;
|
|
355
|
-
//
|
|
354
|
+
// Only path-based skill references count as triggers here.
|
|
356
355
|
detectExplicitSkillReads(payload.arguments);
|
|
357
|
-
detectTriggeredSkills(payload.arguments);
|
|
358
356
|
} else if (itemType === "agent_reasoning") {
|
|
359
357
|
toolCalls.reasoning = (toolCalls.reasoning ?? 0) + 1;
|
|
360
|
-
detectTriggeredSkills(payload.text);
|
|
361
358
|
} else if (itemType === "message") {
|
|
362
|
-
const
|
|
359
|
+
const parts = Array.isArray(payload.content)
|
|
363
360
|
? payload.content
|
|
364
361
|
.map((part) =>
|
|
365
362
|
typeof part === "object" && part
|
|
366
363
|
? (((part as Record<string, unknown>).text as string | undefined) ?? "")
|
|
367
364
|
: "",
|
|
368
365
|
)
|
|
369
|
-
.
|
|
370
|
-
:
|
|
366
|
+
.filter(Boolean)
|
|
367
|
+
: [];
|
|
368
|
+
const content = parts.join("\n");
|
|
371
369
|
rememberSessionSkillNames(content);
|
|
372
|
-
if ((payload.role as string) === "
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
370
|
+
if ((payload.role as string) === "user") {
|
|
371
|
+
for (const part of parts) {
|
|
372
|
+
detectExplicitPromptSkillMentions(part);
|
|
373
|
+
}
|
|
376
374
|
}
|
|
377
375
|
}
|
|
378
376
|
} else if (etype === "turn.started") {
|
|
@@ -410,10 +408,8 @@ export function parseRolloutFile(path: string, skillNames: Set<string>): ParsedR
|
|
|
410
408
|
}
|
|
411
409
|
|
|
412
410
|
// Detect skill names in text content on completed events
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
if (etype === "item.completed") {
|
|
416
|
-
detectTriggeredSkills(textContent);
|
|
411
|
+
if (itemType === "command_execution") {
|
|
412
|
+
detectExplicitSkillReads(item.command);
|
|
417
413
|
}
|
|
418
414
|
} else if (etype === "error") {
|
|
419
415
|
errors += 1;
|
|
@@ -41,11 +41,13 @@ import type {
|
|
|
41
41
|
SessionTelemetryRecord,
|
|
42
42
|
SkillUsageRecord,
|
|
43
43
|
} from "../types.js";
|
|
44
|
+
import { extractActionableQueryText } from "../utils/query-filter.js";
|
|
45
|
+
import { getInternalPromptTargetSkill, isWrappedNonUserPart } from "../utils/skill-detection.js";
|
|
44
46
|
import {
|
|
45
47
|
classifySkillPath,
|
|
46
|
-
containsWholeSkillMention,
|
|
47
48
|
extractExplicitSkillMentions,
|
|
48
49
|
extractSkillNamesFromInstructions,
|
|
50
|
+
extractSkillNamesFromPathReferences,
|
|
49
51
|
findInstalledSkillNames,
|
|
50
52
|
findInstalledSkillPath,
|
|
51
53
|
findRepositorySkillDirs,
|
|
@@ -112,26 +114,34 @@ export function parseJsonlStream(lines: string[], skillNames: Set<string>): Pars
|
|
|
112
114
|
let outputTokens = 0;
|
|
113
115
|
const agentMessages: string[] = [];
|
|
114
116
|
const sessionSkillNames = new Set(skillNames);
|
|
117
|
+
const markSkillTriggered = (skillName: string): void => {
|
|
118
|
+
if (!skillsTriggered.includes(skillName)) {
|
|
119
|
+
skillsTriggered.push(skillName);
|
|
120
|
+
}
|
|
121
|
+
};
|
|
115
122
|
const rememberSessionSkillNames = (text: unknown): void => {
|
|
116
123
|
if (typeof text !== "string" || !text) return;
|
|
117
124
|
for (const skillName of extractSkillNamesFromInstructions(text, sessionSkillNames)) {
|
|
118
125
|
sessionSkillNames.add(skillName);
|
|
119
126
|
}
|
|
120
127
|
};
|
|
121
|
-
const
|
|
128
|
+
const detectExplicitSkillReads = (text: unknown): void => {
|
|
122
129
|
if (typeof text !== "string" || !text) return;
|
|
123
|
-
for (const skillName of sessionSkillNames) {
|
|
124
|
-
|
|
125
|
-
skillsTriggered.push(skillName);
|
|
126
|
-
}
|
|
130
|
+
for (const skillName of extractSkillNamesFromPathReferences(text, sessionSkillNames)) {
|
|
131
|
+
markSkillTriggered(skillName);
|
|
127
132
|
}
|
|
128
133
|
};
|
|
129
134
|
const detectExplicitPromptSkillMentions = (text: unknown): void => {
|
|
130
135
|
if (typeof text !== "string" || !text) return;
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
136
|
+
if (isWrappedNonUserPart(text)) return;
|
|
137
|
+
const actionableText = extractActionableQueryText(text) ?? text;
|
|
138
|
+
const internalTargetSkill = getInternalPromptTargetSkill(actionableText, sessionSkillNames);
|
|
139
|
+
if (internalTargetSkill) {
|
|
140
|
+
markSkillTriggered(internalTargetSkill);
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
for (const skillName of extractExplicitSkillMentions(actionableText, sessionSkillNames)) {
|
|
144
|
+
markSkillTriggered(skillName);
|
|
135
145
|
}
|
|
136
146
|
};
|
|
137
147
|
|
|
@@ -187,40 +197,38 @@ export function parseJsonlStream(lines: string[], skillNames: Set<string>): Pars
|
|
|
187
197
|
} else if (itemType === "agent_message") {
|
|
188
198
|
const text = (item.text as string) ?? "";
|
|
189
199
|
if (text) agentMessages.push(text.slice(0, 500));
|
|
190
|
-
detectTriggeredSkills(text);
|
|
191
200
|
} else if (itemType === "reasoning") {
|
|
192
201
|
toolCalls.reasoning = (toolCalls.reasoning ?? 0) + 1;
|
|
193
202
|
}
|
|
194
203
|
}
|
|
195
204
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
if (etype === "item.completed") {
|
|
199
|
-
detectTriggeredSkills(textContent);
|
|
205
|
+
if (etype === "item.completed" && itemType === "command_execution") {
|
|
206
|
+
detectExplicitSkillReads(item.command);
|
|
200
207
|
}
|
|
201
208
|
} else if (etype === "response_item") {
|
|
202
209
|
const payload = (event.payload as Record<string, unknown>) ?? {};
|
|
203
210
|
const itemType = (payload.type as string) ?? "";
|
|
204
211
|
if (itemType === "function_call") {
|
|
205
|
-
|
|
212
|
+
detectExplicitSkillReads(payload.arguments);
|
|
206
213
|
} else if (itemType === "message") {
|
|
207
|
-
const
|
|
214
|
+
const parts = Array.isArray(payload.content)
|
|
208
215
|
? payload.content
|
|
209
216
|
.map((part) =>
|
|
210
217
|
typeof part === "object" && part
|
|
211
218
|
? (((part as Record<string, unknown>).text as string | undefined) ?? "")
|
|
212
219
|
: "",
|
|
213
220
|
)
|
|
214
|
-
.
|
|
215
|
-
:
|
|
221
|
+
.filter(Boolean)
|
|
222
|
+
: [];
|
|
223
|
+
const content = parts.join("\n");
|
|
216
224
|
rememberSessionSkillNames(content);
|
|
217
|
-
if ((payload.role as string) === "
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
225
|
+
if ((payload.role as string) === "user") {
|
|
226
|
+
for (const part of parts) {
|
|
227
|
+
detectExplicitPromptSkillMentions(part);
|
|
228
|
+
}
|
|
221
229
|
}
|
|
222
230
|
} else if (itemType === "agent_reasoning") {
|
|
223
|
-
|
|
231
|
+
detectExplicitSkillReads(payload.text);
|
|
224
232
|
}
|
|
225
233
|
} else if (etype === "error") {
|
|
226
234
|
errors += 1;
|
|
@@ -54,7 +54,7 @@ export function openDb(dbPath: string = DB_PATH): Database {
|
|
|
54
54
|
const msg = err instanceof Error ? err.message : String(err);
|
|
55
55
|
if (msg.includes("duplicate column")) continue; // expected on subsequent runs
|
|
56
56
|
throw new Error(
|
|
57
|
-
`Schema migration failed: ${msg}. Export first with 'selftune export', then remove '${dbPath}' and rerun 'selftune sync --force'
|
|
57
|
+
`Schema migration failed: ${msg}. Export first with 'selftune export', then remove '${dbPath}' and rerun 'selftune sync --force'. If you need legacy/export JSONL backfill, run 'selftune recover --full --force'.`,
|
|
58
58
|
);
|
|
59
59
|
}
|
|
60
60
|
}
|
|
@@ -67,7 +67,7 @@ export function openDb(dbPath: string = DB_PATH): Database {
|
|
|
67
67
|
const msg = err instanceof Error ? err.message : String(err);
|
|
68
68
|
if (msg.includes("already exists")) continue; // expected on subsequent runs
|
|
69
69
|
throw new Error(
|
|
70
|
-
`Schema index creation failed: ${msg}. Export first with 'selftune export', then remove '${dbPath}' and rerun 'selftune sync --force'
|
|
70
|
+
`Schema index creation failed: ${msg}. Export first with 'selftune export', then remove '${dbPath}' and rerun 'selftune sync --force'. If you need legacy/export JSONL backfill, run 'selftune recover --full --force'.`,
|
|
71
71
|
);
|
|
72
72
|
}
|
|
73
73
|
}
|
|
@@ -285,11 +285,12 @@ export function writeEvolutionAuditToDb(record: EvolutionAuditEntry): boolean {
|
|
|
285
285
|
return safeWrite("evolution-audit", (db) => {
|
|
286
286
|
getStmt(
|
|
287
287
|
db,
|
|
288
|
-
"evolution-audit-
|
|
288
|
+
"evolution-audit-v3",
|
|
289
289
|
`
|
|
290
290
|
INSERT OR IGNORE INTO evolution_audit
|
|
291
|
-
(timestamp, proposal_id, skill_name, action, details, eval_snapshot_json, iterations_used
|
|
292
|
-
|
|
291
|
+
(timestamp, proposal_id, skill_name, action, details, eval_snapshot_json, iterations_used,
|
|
292
|
+
validation_mode, validation_agent, validation_fixture_id, validation_evidence_ref)
|
|
293
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
293
294
|
`,
|
|
294
295
|
).run(
|
|
295
296
|
record.timestamp,
|
|
@@ -299,6 +300,10 @@ export function writeEvolutionAuditToDb(record: EvolutionAuditEntry): boolean {
|
|
|
299
300
|
record.details,
|
|
300
301
|
record.eval_snapshot ? JSON.stringify(record.eval_snapshot) : null,
|
|
301
302
|
record.iterations_used ?? null,
|
|
303
|
+
record.validation_mode ?? null,
|
|
304
|
+
record.validation_agent ?? null,
|
|
305
|
+
record.validation_fixture_id ?? null,
|
|
306
|
+
record.validation_evidence_ref ?? null,
|
|
302
307
|
);
|
|
303
308
|
});
|
|
304
309
|
}
|
|
@@ -600,8 +600,9 @@ function insertEvolutionAudit(db: Database, records: EvolutionAuditEntry[]): num
|
|
|
600
600
|
// (idx_evo_audit_dedup defined in schema.ts).
|
|
601
601
|
const stmt = db.prepare(`
|
|
602
602
|
INSERT OR IGNORE INTO evolution_audit
|
|
603
|
-
(timestamp, proposal_id, skill_name, action, details, eval_snapshot_json, iterations_used
|
|
604
|
-
|
|
603
|
+
(timestamp, proposal_id, skill_name, action, details, eval_snapshot_json, iterations_used,
|
|
604
|
+
validation_mode, validation_agent, validation_fixture_id, validation_evidence_ref)
|
|
605
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
605
606
|
`);
|
|
606
607
|
|
|
607
608
|
let count = 0;
|
|
@@ -614,6 +615,10 @@ function insertEvolutionAudit(db: Database, records: EvolutionAuditEntry[]): num
|
|
|
614
615
|
r.details,
|
|
615
616
|
r.eval_snapshot ? JSON.stringify(r.eval_snapshot) : null,
|
|
616
617
|
r.iterations_used ?? null,
|
|
618
|
+
r.validation_mode ?? null,
|
|
619
|
+
r.validation_agent ?? null,
|
|
620
|
+
r.validation_fixture_id ?? null,
|
|
621
|
+
r.validation_evidence_ref ?? null,
|
|
617
622
|
);
|
|
618
623
|
count++;
|
|
619
624
|
}
|