selftune 0.2.18 → 0.2.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -4
- package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +60 -0
- package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/alpha-upload/stage-canonical.ts +7 -6
- package/cli/selftune/constants.ts +10 -0
- package/cli/selftune/contribute/contribute.ts +30 -2
- package/cli/selftune/contribution-config.ts +249 -0
- package/cli/selftune/contribution-relay.ts +177 -0
- package/cli/selftune/contribution-signals.ts +219 -0
- package/cli/selftune/contribution-staging.ts +147 -0
- package/cli/selftune/contributions.ts +532 -0
- package/cli/selftune/creator-contributions.ts +333 -0
- package/cli/selftune/dashboard-contract.ts +209 -1
- package/cli/selftune/dashboard-server.ts +45 -11
- package/cli/selftune/eval/family-overlap.ts +714 -0
- package/cli/selftune/eval/hooks-to-evals.ts +182 -28
- package/cli/selftune/eval/synthetic-evals.ts +298 -11
- package/cli/selftune/evolution/evidence.ts +5 -0
- package/cli/selftune/evolution/evolve-body.ts +62 -2
- package/cli/selftune/evolution/evolve.ts +58 -1
- package/cli/selftune/evolution/validate-body.ts +10 -0
- package/cli/selftune/evolution/validate-host-replay.ts +236 -0
- package/cli/selftune/evolution/validate-proposal.ts +10 -0
- package/cli/selftune/evolution/validate-routing.ts +112 -5
- package/cli/selftune/export.ts +2 -2
- package/cli/selftune/index.ts +41 -5
- package/cli/selftune/ingestors/codex-rollout.ts +31 -35
- package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
- package/cli/selftune/localdb/db.ts +2 -2
- package/cli/selftune/localdb/direct-write.ts +8 -3
- package/cli/selftune/localdb/materialize.ts +7 -2
- package/cli/selftune/localdb/queries.ts +712 -31
- package/cli/selftune/localdb/schema.ts +30 -1
- package/cli/selftune/recover.ts +153 -0
- package/cli/selftune/repair/skill-usage.ts +363 -4
- package/cli/selftune/routes/actions.ts +35 -1
- package/cli/selftune/routes/analytics.ts +14 -0
- package/cli/selftune/routes/index.ts +1 -0
- package/cli/selftune/routes/overview.ts +112 -4
- package/cli/selftune/routes/skill-report.ts +575 -11
- package/cli/selftune/status.ts +81 -2
- package/cli/selftune/sync.ts +56 -2
- package/cli/selftune/trust-model.ts +66 -0
- package/cli/selftune/types.ts +103 -0
- package/cli/selftune/utils/skill-detection.ts +43 -0
- package/cli/selftune/utils/text-similarity.ts +73 -0
- package/cli/selftune/watchlist.ts +65 -0
- package/package.json +1 -1
- package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
- package/packages/ui/src/components/EvidenceViewer.tsx +419 -145
- package/packages/ui/src/components/EvolutionTimeline.tsx +81 -29
- package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
- package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
- package/packages/ui/src/components/section-cards.tsx +12 -9
- package/packages/ui/src/primitives/card.tsx +1 -1
- package/packages/ui/src/types.ts +4 -0
- package/skill/SKILL.md +11 -1
- package/skill/Workflows/AlphaUpload.md +4 -0
- package/skill/Workflows/Composability.md +78 -0
- package/skill/Workflows/Contribute.md +6 -3
- package/skill/Workflows/Contributions.md +97 -0
- package/skill/Workflows/CreatorContributions.md +74 -0
- package/skill/Workflows/Dashboard.md +31 -0
- package/skill/Workflows/Evals.md +57 -8
- package/skill/Workflows/Evolve.md +23 -0
- package/skill/Workflows/Ingest.md +7 -0
- package/skill/Workflows/Initialize.md +20 -1
- package/skill/Workflows/Recover.md +84 -0
- package/skill/Workflows/RepairSkillUsage.md +12 -4
- package/skill/Workflows/Sync.md +18 -12
- package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
- package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
- package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
- package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12
package/cli/selftune/status.ts
CHANGED
|
@@ -20,13 +20,16 @@ import { getDb } from "./localdb/db.js";
|
|
|
20
20
|
import {
|
|
21
21
|
getLastUploadError,
|
|
22
22
|
getLastUploadSuccess,
|
|
23
|
+
getSkillTrustSummaries,
|
|
23
24
|
queryEvolutionAudit,
|
|
24
25
|
queryQueryLog,
|
|
25
26
|
querySessionTelemetry,
|
|
26
27
|
querySkillUsageRecords,
|
|
28
|
+
type SkillTrustSummary,
|
|
27
29
|
} from "./localdb/queries.js";
|
|
28
30
|
import { computeMonitoringSnapshot, MIN_MONITORING_SKILL_CHECKS } from "./monitoring/watch.js";
|
|
29
31
|
import { doctor } from "./observability.js";
|
|
32
|
+
import { deriveTrustBucket, deriveTrustBucketReason } from "./trust-model.js";
|
|
30
33
|
import type {
|
|
31
34
|
AgentCommandGuidance,
|
|
32
35
|
AlphaLinkState,
|
|
@@ -273,7 +276,44 @@ const TREND_SYMBOLS: Record<string, string> = {
|
|
|
273
276
|
unknown: "?",
|
|
274
277
|
};
|
|
275
278
|
|
|
276
|
-
|
|
279
|
+
function formatTrustHighlights(trustSummaries: SkillTrustSummary[] | undefined): string[] {
|
|
280
|
+
if (!trustSummaries || trustSummaries.length === 0) return [];
|
|
281
|
+
|
|
282
|
+
const recentSort = (a: SkillTrustSummary, b: SkillTrustSummary) =>
|
|
283
|
+
(b.last_seen ?? "").localeCompare(a.last_seen ?? "");
|
|
284
|
+
const attention = [...trustSummaries]
|
|
285
|
+
.filter((summary) => deriveTrustBucket(summary) === "at_risk")
|
|
286
|
+
.sort(recentSort)
|
|
287
|
+
.slice(0, 3);
|
|
288
|
+
const improving = [...trustSummaries]
|
|
289
|
+
.filter((summary) => deriveTrustBucket(summary) === "improving")
|
|
290
|
+
.sort(recentSort)
|
|
291
|
+
.slice(0, 3);
|
|
292
|
+
|
|
293
|
+
if (attention.length === 0 && improving.length === 0) return [];
|
|
294
|
+
|
|
295
|
+
const lines = ["Highlights"];
|
|
296
|
+
if (attention.length > 0) {
|
|
297
|
+
lines.push(
|
|
298
|
+
` Attention: ${attention
|
|
299
|
+
.map((summary) => `${summary.skill_name} (${deriveTrustBucketReason("at_risk", summary)})`)
|
|
300
|
+
.join("; ")}`,
|
|
301
|
+
);
|
|
302
|
+
}
|
|
303
|
+
if (improving.length > 0) {
|
|
304
|
+
lines.push(
|
|
305
|
+
` Improving: ${improving
|
|
306
|
+
.map(
|
|
307
|
+
(summary) => `${summary.skill_name} (${deriveTrustBucketReason("improving", summary)})`,
|
|
308
|
+
)
|
|
309
|
+
.join("; ")}`,
|
|
310
|
+
);
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
return lines;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
export function formatStatus(result: StatusResult, trustSummaries?: SkillTrustSummary[]): string {
|
|
277
317
|
const noColor = !!process.env.NO_COLOR;
|
|
278
318
|
|
|
279
319
|
const green = noColor ? (s: string) => s : (s: string) => colorize(s, "#788c5d");
|
|
@@ -284,6 +324,14 @@ export function formatStatus(result: StatusResult): string {
|
|
|
284
324
|
lines.push("selftune status");
|
|
285
325
|
lines.push("\u2550".repeat(15));
|
|
286
326
|
lines.push("");
|
|
327
|
+
lines.push(formatStatusSummary(result, trustSummaries));
|
|
328
|
+
lines.push("");
|
|
329
|
+
|
|
330
|
+
const highlightLines = formatTrustHighlights(trustSummaries);
|
|
331
|
+
if (highlightLines.length > 0) {
|
|
332
|
+
lines.push(...highlightLines);
|
|
333
|
+
lines.push("");
|
|
334
|
+
}
|
|
287
335
|
|
|
288
336
|
// Skills table
|
|
289
337
|
const skillCount = result.skills.length;
|
|
@@ -351,6 +399,36 @@ export function formatStatus(result: StatusResult): string {
|
|
|
351
399
|
return lines.join("\n");
|
|
352
400
|
}
|
|
353
401
|
|
|
402
|
+
export function formatStatusSummary(
|
|
403
|
+
result: StatusResult,
|
|
404
|
+
trustSummaries?: SkillTrustSummary[],
|
|
405
|
+
): string {
|
|
406
|
+
const watched = trustSummaries?.length ?? result.skills.length;
|
|
407
|
+
const improving =
|
|
408
|
+
trustSummaries?.filter((summary) => deriveTrustBucket(summary) === "improving").length ??
|
|
409
|
+
result.skills.filter((skill) => skill.trend === "up").length;
|
|
410
|
+
const needsAttention =
|
|
411
|
+
trustSummaries?.filter((summary) => deriveTrustBucket(summary) === "at_risk").length ??
|
|
412
|
+
result.skills.filter((skill) => skill.status === "WARNING" || skill.status === "CRITICAL")
|
|
413
|
+
.length;
|
|
414
|
+
|
|
415
|
+
const watchedText = `${watched} ${watched === 1 ? "skill" : "skills"} watched`;
|
|
416
|
+
const improvingText =
|
|
417
|
+
improving > 0
|
|
418
|
+
? `${improving} improving`
|
|
419
|
+
: result.lastSession
|
|
420
|
+
? "no recent lift"
|
|
421
|
+
: "no recent data";
|
|
422
|
+
const attentionText =
|
|
423
|
+
needsAttention > 0
|
|
424
|
+
? `${needsAttention} needing attention`
|
|
425
|
+
: watched > 0
|
|
426
|
+
? "nothing urgent"
|
|
427
|
+
: "nothing tracked yet";
|
|
428
|
+
|
|
429
|
+
return `${watchedText} | ${improvingText} | ${attentionText}`;
|
|
430
|
+
}
|
|
431
|
+
|
|
354
432
|
// ---------------------------------------------------------------------------
|
|
355
433
|
// Terminal color helper using ANSI escapes
|
|
356
434
|
// ---------------------------------------------------------------------------
|
|
@@ -506,7 +584,8 @@ export async function cliMain(): Promise<void> {
|
|
|
506
584
|
const doctorResult = await doctor();
|
|
507
585
|
|
|
508
586
|
const result = computeStatus(telemetry, skillRecords, queryRecords, auditEntries, doctorResult);
|
|
509
|
-
const
|
|
587
|
+
const trustSummaries = getSkillTrustSummaries(db);
|
|
588
|
+
const output = formatStatus(result, trustSummaries);
|
|
510
589
|
console.log(output);
|
|
511
590
|
|
|
512
591
|
// Alpha upload status section
|
package/cli/selftune/sync.ts
CHANGED
|
@@ -31,6 +31,7 @@ import {
|
|
|
31
31
|
SKILL_LOG,
|
|
32
32
|
TELEMETRY_LOG,
|
|
33
33
|
} from "./constants.js";
|
|
34
|
+
import { stageCreatorContributionSignals } from "./contribution-staging.js";
|
|
34
35
|
import {
|
|
35
36
|
findTranscriptFiles,
|
|
36
37
|
parseSession,
|
|
@@ -58,6 +59,7 @@ import {
|
|
|
58
59
|
import { getDb } from "./localdb/db.js";
|
|
59
60
|
import { querySkillUsageRecords } from "./localdb/queries.js";
|
|
60
61
|
import {
|
|
62
|
+
persistRepairedSkillUsageToDb,
|
|
61
63
|
rebuildSkillUsageFromCodexRollouts,
|
|
62
64
|
rebuildSkillUsageFromTranscripts,
|
|
63
65
|
} from "./repair/skill-usage.js";
|
|
@@ -96,6 +98,12 @@ export interface SyncResult {
|
|
|
96
98
|
repaired_records: number;
|
|
97
99
|
codex_repaired_records: number;
|
|
98
100
|
};
|
|
101
|
+
creator_contributions: {
|
|
102
|
+
ran: boolean;
|
|
103
|
+
eligible_skills: number;
|
|
104
|
+
built_signals: number;
|
|
105
|
+
staged_signals: number;
|
|
106
|
+
};
|
|
99
107
|
timings: SyncPhaseTiming[];
|
|
100
108
|
total_elapsed_ms: number;
|
|
101
109
|
}
|
|
@@ -130,6 +138,14 @@ export interface SyncDeps {
|
|
|
130
138
|
repairedRecords: number;
|
|
131
139
|
codexRepairedRecords: number;
|
|
132
140
|
};
|
|
141
|
+
stageCreatorContributions?: (
|
|
142
|
+
db: ReturnType<typeof getDb>,
|
|
143
|
+
options: { dryRun: boolean },
|
|
144
|
+
) => {
|
|
145
|
+
eligible_skills: number;
|
|
146
|
+
built_signals: number;
|
|
147
|
+
staged_signals: number;
|
|
148
|
+
};
|
|
133
149
|
}
|
|
134
150
|
|
|
135
151
|
export function createDefaultSyncOptions(overrides: Partial<SyncOptions> = {}): SyncOptions {
|
|
@@ -344,6 +360,7 @@ function rebuildSkillUsageOverlay(
|
|
|
344
360
|
options: SyncOptions,
|
|
345
361
|
onProgress?: SyncProgressCallback,
|
|
346
362
|
cache?: FileListCache,
|
|
363
|
+
db: ReturnType<typeof getDb> = getDb(),
|
|
347
364
|
): {
|
|
348
365
|
repairedSessions: number;
|
|
349
366
|
repairedRecords: number;
|
|
@@ -363,7 +380,6 @@ function rebuildSkillUsageOverlay(
|
|
|
363
380
|
let rawSkillRecords: SkillUsageRecord[];
|
|
364
381
|
if (options.skillLogPath === SKILL_LOG) {
|
|
365
382
|
try {
|
|
366
|
-
const db = getDb();
|
|
367
383
|
rawSkillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
|
|
368
384
|
} catch {
|
|
369
385
|
rawSkillRecords = readJsonl<SkillUsageRecord>(options.skillLogPath);
|
|
@@ -389,6 +405,7 @@ function rebuildSkillUsageOverlay(
|
|
|
389
405
|
repairedRecords.push(...codexRecords);
|
|
390
406
|
|
|
391
407
|
if (!options.dryRun) {
|
|
408
|
+
persistRepairedSkillUsageToDb(db, repairedRecords);
|
|
392
409
|
writeRepairedSkillUsageRecords(
|
|
393
410
|
repairedRecords,
|
|
394
411
|
repairedSessionIds,
|
|
@@ -429,6 +446,8 @@ export function syncSources(
|
|
|
429
446
|
const runOpenCode = deps.syncOpenCode;
|
|
430
447
|
const runOpenClaw = deps.syncOpenClaw;
|
|
431
448
|
const runRepair = deps.rebuildSkillUsage;
|
|
449
|
+
const runCreatorContributions = deps.stageCreatorContributions;
|
|
450
|
+
const db = getDb();
|
|
432
451
|
|
|
433
452
|
const disabledStep: SyncStepResult = { available: false, scanned: 0, synced: 0, skipped: 0 };
|
|
434
453
|
|
|
@@ -470,11 +489,27 @@ export function syncSources(
|
|
|
470
489
|
? timePhase(
|
|
471
490
|
"repair",
|
|
472
491
|
() =>
|
|
473
|
-
runRepair ? runRepair(options) : rebuildSkillUsageOverlay(options, onProgress, cache),
|
|
492
|
+
runRepair ? runRepair(options) : rebuildSkillUsageOverlay(options, onProgress, cache, db),
|
|
474
493
|
timings,
|
|
475
494
|
)
|
|
476
495
|
: { repairedSessions: 0, repairedRecords: 0, codexRepairedRecords: 0 };
|
|
477
496
|
|
|
497
|
+
const creatorContributions = timePhase(
|
|
498
|
+
"creator_contributions",
|
|
499
|
+
() => {
|
|
500
|
+
const staged = runCreatorContributions
|
|
501
|
+
? runCreatorContributions(db, { dryRun: options.dryRun })
|
|
502
|
+
: stageCreatorContributionSignals(db, { dryRun: options.dryRun });
|
|
503
|
+
return {
|
|
504
|
+
ran: true,
|
|
505
|
+
eligible_skills: staged.eligible_skills,
|
|
506
|
+
built_signals: staged.built_signals,
|
|
507
|
+
staged_signals: staged.staged_signals,
|
|
508
|
+
};
|
|
509
|
+
},
|
|
510
|
+
timings,
|
|
511
|
+
);
|
|
512
|
+
|
|
478
513
|
const totalElapsed = Math.round(performance.now() - totalStart);
|
|
479
514
|
|
|
480
515
|
return {
|
|
@@ -487,6 +522,7 @@ export function syncSources(
|
|
|
487
522
|
repaired_records: repair.repairedRecords,
|
|
488
523
|
codex_repaired_records: repair.codexRepairedRecords,
|
|
489
524
|
},
|
|
525
|
+
creator_contributions: creatorContributions,
|
|
490
526
|
timings,
|
|
491
527
|
total_elapsed_ms: totalElapsed,
|
|
492
528
|
};
|
|
@@ -636,6 +672,24 @@ Options:
|
|
|
636
672
|
);
|
|
637
673
|
}
|
|
638
674
|
|
|
675
|
+
if (
|
|
676
|
+
result.creator_contributions.eligible_skills > 0 ||
|
|
677
|
+
result.creator_contributions.built_signals > 0
|
|
678
|
+
) {
|
|
679
|
+
const contributionTiming = timingMap.get("creator_contributions");
|
|
680
|
+
const contributionTime = contributionTiming
|
|
681
|
+
? ` (${formatMs(contributionTiming.elapsed_ms)})`
|
|
682
|
+
: "";
|
|
683
|
+
process.stderr.write(
|
|
684
|
+
`Creator contributions: ${result.creator_contributions.built_signals} signals from ` +
|
|
685
|
+
`${result.creator_contributions.eligible_skills} skills` +
|
|
686
|
+
(result.dry_run
|
|
687
|
+
? " ready to stage"
|
|
688
|
+
: ` staged=${result.creator_contributions.staged_signals}`) +
|
|
689
|
+
`${contributionTime}\n`,
|
|
690
|
+
);
|
|
691
|
+
}
|
|
692
|
+
|
|
639
693
|
process.stderr.write(`\nDone in ${formatMs(result.total_elapsed_ms)}\n`);
|
|
640
694
|
}
|
|
641
695
|
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import type { TrustBucket, TrustState, TrustWatchlistEntry } from "./dashboard-contract.js";
|
|
2
|
+
import type { SkillTrustSummary } from "./localdb/queries.js";
|
|
3
|
+
|
|
4
|
+
const AT_RISK_MISS_RATE_THRESHOLD = 0.15;
|
|
5
|
+
const UNCERTAIN_MIN_CHECKS = 10;
|
|
6
|
+
|
|
7
|
+
function formatPercent(value: number): string {
|
|
8
|
+
return `${(value * 100).toFixed(1).replace(/\.0$/, "")}%`;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function deriveTrustState(summary: SkillTrustSummary): TrustState {
|
|
12
|
+
if (summary.latest_action === "rolled_back") return "rolled_back";
|
|
13
|
+
if (summary.latest_action === "deployed") return "deployed";
|
|
14
|
+
if (summary.latest_action === "validated") return "validated";
|
|
15
|
+
if (summary.latest_action === "watch") return "watch";
|
|
16
|
+
if (summary.total_checks < 5) return "low_sample";
|
|
17
|
+
return "observed";
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function deriveTrustBucket(summary: SkillTrustSummary): TrustBucket {
|
|
21
|
+
if (summary.latest_action === "rolled_back" || summary.miss_rate > AT_RISK_MISS_RATE_THRESHOLD) {
|
|
22
|
+
return "at_risk";
|
|
23
|
+
}
|
|
24
|
+
if (
|
|
25
|
+
summary.latest_action === "validated" ||
|
|
26
|
+
summary.latest_action === "created" ||
|
|
27
|
+
summary.latest_action === "proposed"
|
|
28
|
+
) {
|
|
29
|
+
return "improving";
|
|
30
|
+
}
|
|
31
|
+
if (summary.total_checks < UNCERTAIN_MIN_CHECKS || summary.latest_action === "watch") {
|
|
32
|
+
return "uncertain";
|
|
33
|
+
}
|
|
34
|
+
return "stable";
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function deriveTrustBucketReason(bucket: TrustBucket, summary: SkillTrustSummary): string {
|
|
38
|
+
switch (bucket) {
|
|
39
|
+
case "at_risk":
|
|
40
|
+
if (summary.latest_action === "rolled_back") return "Recently rolled back";
|
|
41
|
+
return `High miss rate (${formatPercent(summary.miss_rate)})`;
|
|
42
|
+
case "improving":
|
|
43
|
+
if (summary.latest_action === "validated") return "Proposal validated, pending deploy";
|
|
44
|
+
return "Has pending evolution proposal";
|
|
45
|
+
case "uncertain":
|
|
46
|
+
if (summary.total_checks < 10) return `Low sample size (${summary.total_checks} checks)`;
|
|
47
|
+
return "Under active observation";
|
|
48
|
+
case "stable":
|
|
49
|
+
return "Routing healthy, no issues detected";
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function buildTrustWatchlist(summaries: SkillTrustSummary[]): TrustWatchlistEntry[] {
|
|
54
|
+
return summaries.map((summary) => {
|
|
55
|
+
const bucket = deriveTrustBucket(summary);
|
|
56
|
+
return {
|
|
57
|
+
skill_name: summary.skill_name,
|
|
58
|
+
bucket,
|
|
59
|
+
trust_state: deriveTrustState(summary),
|
|
60
|
+
reason: deriveTrustBucketReason(bucket, summary),
|
|
61
|
+
pass_rate: summary.pass_rate,
|
|
62
|
+
checks: summary.total_checks,
|
|
63
|
+
last_seen: summary.last_seen,
|
|
64
|
+
};
|
|
65
|
+
});
|
|
66
|
+
}
|
package/cli/selftune/types.ts
CHANGED
|
@@ -400,6 +400,10 @@ export interface EvolutionAuditEntry {
|
|
|
400
400
|
details: string;
|
|
401
401
|
eval_snapshot?: EvalPassRate;
|
|
402
402
|
iterations_used?: number;
|
|
403
|
+
validation_mode?: ValidationMode;
|
|
404
|
+
validation_agent?: string;
|
|
405
|
+
validation_fixture_id?: string;
|
|
406
|
+
validation_evidence_ref?: string;
|
|
403
407
|
}
|
|
404
408
|
|
|
405
409
|
export interface EvolutionEvidenceValidation {
|
|
@@ -413,6 +417,10 @@ export interface EvolutionEvidenceValidation {
|
|
|
413
417
|
gates_passed?: number;
|
|
414
418
|
gates_total?: number;
|
|
415
419
|
gate_results?: Array<{ gate: ValidationGate; passed: boolean; reason: string }>;
|
|
420
|
+
validation_mode?: ValidationMode;
|
|
421
|
+
validation_agent?: string;
|
|
422
|
+
validation_fixture_id?: string;
|
|
423
|
+
validation_evidence_ref?: string;
|
|
416
424
|
}
|
|
417
425
|
|
|
418
426
|
export interface EvolutionEvidenceEntry {
|
|
@@ -697,6 +705,25 @@ export interface BodyEvolutionProposal {
|
|
|
697
705
|
/** Closed union of gate names used in the validation pipeline. */
|
|
698
706
|
export type ValidationGate = "structural" | "trigger_accuracy" | "quality";
|
|
699
707
|
|
|
708
|
+
export type ValidationMode = "structural_guard" | "host_replay" | "llm_judge";
|
|
709
|
+
|
|
710
|
+
export interface RoutingReplayFixture {
|
|
711
|
+
fixture_id: string;
|
|
712
|
+
platform: "claude_code" | "codex";
|
|
713
|
+
target_skill_name: string;
|
|
714
|
+
target_skill_path: string;
|
|
715
|
+
competing_skill_paths: string[];
|
|
716
|
+
workspace_root?: string;
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
export interface RoutingReplayEntryResult {
|
|
720
|
+
query: string;
|
|
721
|
+
should_trigger: boolean;
|
|
722
|
+
triggered: boolean;
|
|
723
|
+
passed: boolean;
|
|
724
|
+
evidence?: string;
|
|
725
|
+
}
|
|
726
|
+
|
|
700
727
|
/** Result of validating a body evolution proposal. */
|
|
701
728
|
export interface BodyValidationResult {
|
|
702
729
|
proposal_id: string;
|
|
@@ -705,6 +732,12 @@ export interface BodyValidationResult {
|
|
|
705
732
|
gate_results: Array<{ gate: ValidationGate; passed: boolean; reason: string }>;
|
|
706
733
|
improved: boolean;
|
|
707
734
|
regressions: string[];
|
|
735
|
+
validation_mode?: ValidationMode;
|
|
736
|
+
validation_agent?: string;
|
|
737
|
+
validation_fixture_id?: string;
|
|
738
|
+
before_pass_rate?: number;
|
|
739
|
+
after_pass_rate?: number;
|
|
740
|
+
per_entry_results?: RoutingReplayEntryResult[];
|
|
708
741
|
}
|
|
709
742
|
|
|
710
743
|
/** Configuration for which LLM model a role should use. */
|
|
@@ -854,6 +887,76 @@ export interface ComposabilityReportV2 extends ComposabilityReport {
|
|
|
854
887
|
synergy_count: number;
|
|
855
888
|
}
|
|
856
889
|
|
|
890
|
+
// ---------------------------------------------------------------------------
|
|
891
|
+
// Skill family overlap / consolidation types
|
|
892
|
+
// ---------------------------------------------------------------------------
|
|
893
|
+
|
|
894
|
+
export interface SkillFamilyOverlapMember {
|
|
895
|
+
skill_name: string;
|
|
896
|
+
skill_path?: string;
|
|
897
|
+
positive_query_count: number;
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
export interface SkillFamilyOverlapPair {
|
|
901
|
+
skill_a: string;
|
|
902
|
+
skill_b: string;
|
|
903
|
+
overlap_pct: number;
|
|
904
|
+
shared_query_count: number;
|
|
905
|
+
shared_queries: string[];
|
|
906
|
+
consolidation_pressure: "low" | "medium" | "high";
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
export interface SkillFamilyColdStartPair {
|
|
910
|
+
skill_a: string;
|
|
911
|
+
skill_b: string;
|
|
912
|
+
description_similarity: number;
|
|
913
|
+
when_to_use_similarity: number;
|
|
914
|
+
shared_command_surfaces: string[];
|
|
915
|
+
shared_terms: string[];
|
|
916
|
+
synthetic_confusion_queries: string[];
|
|
917
|
+
suspicion_level: "low" | "medium" | "high";
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
export interface SkillFamilyColdStartSuspicion {
|
|
921
|
+
candidate: boolean;
|
|
922
|
+
analyzed_pairs: number;
|
|
923
|
+
suspicious_pair_count: number;
|
|
924
|
+
average_static_similarity: number;
|
|
925
|
+
pairs: SkillFamilyColdStartPair[];
|
|
926
|
+
rationale: string[];
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
export interface SkillFamilyRefactorWorkflow {
|
|
930
|
+
workflow_name: string;
|
|
931
|
+
source_skill: string;
|
|
932
|
+
suggested_path: string;
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
export interface SkillFamilyRefactorProposal {
|
|
936
|
+
parent_skill_name: string;
|
|
937
|
+
family_prefix?: string;
|
|
938
|
+
internal_workflows: SkillFamilyRefactorWorkflow[];
|
|
939
|
+
compatibility_aliases: Array<{ skill_name: string; target_workflow: string }>;
|
|
940
|
+
migration_notes: string[];
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
export interface SkillFamilyOverlapReport {
|
|
944
|
+
family_prefix?: string;
|
|
945
|
+
analyzed_skills: string[];
|
|
946
|
+
members: SkillFamilyOverlapMember[];
|
|
947
|
+
pairs: SkillFamilyOverlapPair[];
|
|
948
|
+
cold_start_suspicion?: SkillFamilyColdStartSuspicion;
|
|
949
|
+
total_pairs_analyzed: number;
|
|
950
|
+
overlap_count: number;
|
|
951
|
+
overlap_density: number;
|
|
952
|
+
average_overlap_pct: number;
|
|
953
|
+
consolidation_candidate: boolean;
|
|
954
|
+
recommendation: string;
|
|
955
|
+
rationale: string[];
|
|
956
|
+
refactor_proposal?: SkillFamilyRefactorProposal;
|
|
957
|
+
generated_at: string;
|
|
958
|
+
}
|
|
959
|
+
|
|
857
960
|
// ---------------------------------------------------------------------------
|
|
858
961
|
// Workflow Support types
|
|
859
962
|
// ---------------------------------------------------------------------------
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
export function normalizeSkillName(value: string): string {
|
|
2
|
+
return value.trim().toLowerCase();
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
export function getInternalPromptTargetSkill(
|
|
6
|
+
text: string,
|
|
7
|
+
knownSkillNames: Iterable<string>,
|
|
8
|
+
): string | null {
|
|
9
|
+
if (!text) return null;
|
|
10
|
+
const isInternalSkillPrompt =
|
|
11
|
+
text.includes("You are a skill description optimizer") ||
|
|
12
|
+
text.includes("You are an evaluation assistant") ||
|
|
13
|
+
text.includes("Given this skill description");
|
|
14
|
+
if (!isInternalSkillPrompt) return null;
|
|
15
|
+
|
|
16
|
+
const candidates = [
|
|
17
|
+
/Skill Name:\s*([^\n]+)/i,
|
|
18
|
+
/Propose an improved description for the "([^"]+)" skill/i,
|
|
19
|
+
/would each query trigger the "([^"]+)" skill/i,
|
|
20
|
+
];
|
|
21
|
+
for (const pattern of candidates) {
|
|
22
|
+
const match = text.match(pattern);
|
|
23
|
+
const rawSkillName = match?.[1]?.trim();
|
|
24
|
+
if (!rawSkillName) continue;
|
|
25
|
+
const normalizedTarget = normalizeSkillName(rawSkillName);
|
|
26
|
+
for (const skillName of knownSkillNames) {
|
|
27
|
+
if (normalizeSkillName(skillName) === normalizedTarget) {
|
|
28
|
+
return skillName;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return rawSkillName;
|
|
32
|
+
}
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function isWrappedNonUserPart(text: string): boolean {
|
|
37
|
+
const trimmed = text.trimStart();
|
|
38
|
+
return (
|
|
39
|
+
trimmed.startsWith("# AGENTS.md instructions for ") ||
|
|
40
|
+
trimmed.startsWith("<environment_context>") ||
|
|
41
|
+
trimmed.startsWith("<INSTRUCTIONS>")
|
|
42
|
+
);
|
|
43
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
const BASE_TEXT_SIMILARITY_STOPWORDS = new Set([
|
|
2
|
+
"a",
|
|
3
|
+
"an",
|
|
4
|
+
"and",
|
|
5
|
+
"are",
|
|
6
|
+
"as",
|
|
7
|
+
"at",
|
|
8
|
+
"be",
|
|
9
|
+
"for",
|
|
10
|
+
"from",
|
|
11
|
+
"how",
|
|
12
|
+
"in",
|
|
13
|
+
"into",
|
|
14
|
+
"is",
|
|
15
|
+
"it",
|
|
16
|
+
"of",
|
|
17
|
+
"on",
|
|
18
|
+
"or",
|
|
19
|
+
"that",
|
|
20
|
+
"the",
|
|
21
|
+
"this",
|
|
22
|
+
"to",
|
|
23
|
+
"use",
|
|
24
|
+
"user",
|
|
25
|
+
"when",
|
|
26
|
+
"with",
|
|
27
|
+
]);
|
|
28
|
+
|
|
29
|
+
export function buildStopwordSet(additionalStopwords: string[] = []): Set<string> {
|
|
30
|
+
return new Set([...BASE_TEXT_SIMILARITY_STOPWORDS, ...additionalStopwords]);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function tokenizeText(
|
|
34
|
+
text: string,
|
|
35
|
+
stopwords = BASE_TEXT_SIMILARITY_STOPWORDS,
|
|
36
|
+
): Set<string> {
|
|
37
|
+
return new Set(
|
|
38
|
+
text
|
|
39
|
+
.toLowerCase()
|
|
40
|
+
.split(/[^a-z0-9]+/i)
|
|
41
|
+
.map((token) => token.trim())
|
|
42
|
+
.filter((token) => token.length >= 3 && !stopwords.has(token)),
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function jaccardSimilarity(left: Set<string>, right: Set<string>): number {
|
|
47
|
+
if (left.size === 0 || right.size === 0) return 0;
|
|
48
|
+
let shared = 0;
|
|
49
|
+
for (const token of left) {
|
|
50
|
+
if (right.has(token)) shared += 1;
|
|
51
|
+
}
|
|
52
|
+
const union = left.size + right.size - shared;
|
|
53
|
+
return union > 0 ? shared / union : 0;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function extractWhenToUseLines(body: string): string[] {
|
|
57
|
+
const lines = body.split("\n");
|
|
58
|
+
const start = lines.findIndex((line) => /^##+\s+when to use\s*$/i.test(line.trim()));
|
|
59
|
+
if (start === -1) return [];
|
|
60
|
+
|
|
61
|
+
const extracted: string[] = [];
|
|
62
|
+
for (let i = start + 1; i < lines.length; i++) {
|
|
63
|
+
const line = lines[i].trim();
|
|
64
|
+
if (!line) continue;
|
|
65
|
+
if (/^##+\s+/.test(line)) break;
|
|
66
|
+
if (/^[-*]\s+/.test(line)) {
|
|
67
|
+
extracted.push(line.replace(/^[-*]\s+/, "").trim());
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
extracted.push(line);
|
|
71
|
+
}
|
|
72
|
+
return extracted;
|
|
73
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import {
|
|
2
|
+
existsSync,
|
|
3
|
+
mkdirSync,
|
|
4
|
+
readFileSync,
|
|
5
|
+
renameSync,
|
|
6
|
+
unlinkSync,
|
|
7
|
+
writeFileSync,
|
|
8
|
+
} from "node:fs";
|
|
9
|
+
|
|
10
|
+
import { SELFTUNE_CONFIG_DIR, WATCHED_SKILLS_PATH } from "./constants.js";
|
|
11
|
+
|
|
12
|
+
const CURRENT_WATCHLIST_VERSION = 1;
|
|
13
|
+
|
|
14
|
+
interface WatchlistPayload {
|
|
15
|
+
version: typeof CURRENT_WATCHLIST_VERSION;
|
|
16
|
+
skills: string[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function normalizeSkills(skills: string[]): string[] {
|
|
20
|
+
const seen = new Set<string>();
|
|
21
|
+
const normalized: string[] = [];
|
|
22
|
+
for (const skill of skills) {
|
|
23
|
+
const trimmed = skill.trim();
|
|
24
|
+
if (!trimmed || seen.has(trimmed)) continue;
|
|
25
|
+
seen.add(trimmed);
|
|
26
|
+
normalized.push(trimmed);
|
|
27
|
+
}
|
|
28
|
+
return normalized;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function loadWatchedSkills(): string[] {
|
|
32
|
+
try {
|
|
33
|
+
if (!existsSync(WATCHED_SKILLS_PATH)) return [];
|
|
34
|
+
const parsed = JSON.parse(
|
|
35
|
+
readFileSync(WATCHED_SKILLS_PATH, "utf-8"),
|
|
36
|
+
) as Partial<WatchlistPayload>;
|
|
37
|
+
return parsed.version === CURRENT_WATCHLIST_VERSION && Array.isArray(parsed.skills)
|
|
38
|
+
? normalizeSkills(parsed.skills.filter((skill): skill is string => typeof skill === "string"))
|
|
39
|
+
: [];
|
|
40
|
+
} catch {
|
|
41
|
+
return [];
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function saveWatchedSkills(skills: string[]): string[] {
|
|
46
|
+
const normalized = normalizeSkills(skills);
|
|
47
|
+
mkdirSync(SELFTUNE_CONFIG_DIR, { recursive: true });
|
|
48
|
+
const tempPath = `${WATCHED_SKILLS_PATH}.${process.pid}.${Date.now()}.tmp`;
|
|
49
|
+
try {
|
|
50
|
+
writeFileSync(
|
|
51
|
+
tempPath,
|
|
52
|
+
JSON.stringify({ version: CURRENT_WATCHLIST_VERSION, skills: normalized }, null, 2),
|
|
53
|
+
"utf-8",
|
|
54
|
+
);
|
|
55
|
+
renameSync(tempPath, WATCHED_SKILLS_PATH);
|
|
56
|
+
} catch (error) {
|
|
57
|
+
try {
|
|
58
|
+
if (existsSync(tempPath)) unlinkSync(tempPath);
|
|
59
|
+
} catch {
|
|
60
|
+
// Best-effort cleanup for interrupted temp writes.
|
|
61
|
+
}
|
|
62
|
+
throw error;
|
|
63
|
+
}
|
|
64
|
+
return normalized;
|
|
65
|
+
}
|