selftune 0.2.16 → 0.2.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -22
- package/apps/local-dashboard/dist/assets/index-DnhnXQm6.js +60 -0
- package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-table-BIiI3YhS.js +1 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +12 -0
- package/apps/local-dashboard/dist/index.html +5 -5
- package/cli/selftune/alpha-upload/build-payloads.ts +14 -1
- package/cli/selftune/alpha-upload/client.ts +51 -1
- package/cli/selftune/alpha-upload/flush.ts +46 -5
- package/cli/selftune/alpha-upload/stage-canonical.ts +32 -10
- package/cli/selftune/alpha-upload-contract.ts +9 -0
- package/cli/selftune/constants.ts +92 -5
- package/cli/selftune/contribute/contribute.ts +30 -2
- package/cli/selftune/contribute/sanitize.ts +52 -5
- package/cli/selftune/contribution-config.ts +249 -0
- package/cli/selftune/contribution-relay.ts +177 -0
- package/cli/selftune/contribution-signals.ts +219 -0
- package/cli/selftune/contribution-staging.ts +147 -0
- package/cli/selftune/contributions.ts +532 -0
- package/cli/selftune/creator-contributions.ts +333 -0
- package/cli/selftune/dashboard-contract.ts +305 -1
- package/cli/selftune/dashboard-server.ts +47 -13
- package/cli/selftune/eval/family-overlap.ts +395 -0
- package/cli/selftune/eval/hooks-to-evals.ts +182 -28
- package/cli/selftune/eval/synthetic-evals.ts +298 -11
- package/cli/selftune/evolution/description-quality.ts +12 -11
- package/cli/selftune/evolution/evolve.ts +214 -51
- package/cli/selftune/evolution/validate-proposal.ts +9 -6
- package/cli/selftune/export.ts +2 -2
- package/cli/selftune/grading/grade-session.ts +20 -0
- package/cli/selftune/hooks/commit-track.ts +188 -0
- package/cli/selftune/hooks/prompt-log.ts +10 -1
- package/cli/selftune/hooks/session-stop.ts +2 -2
- package/cli/selftune/hooks/skill-eval.ts +15 -1
- package/cli/selftune/hooks/stdin-preview.ts +32 -0
- package/cli/selftune/index.ts +41 -5
- package/cli/selftune/ingestors/codex-rollout.ts +31 -35
- package/cli/selftune/ingestors/codex-wrapper.ts +32 -24
- package/cli/selftune/localdb/db.ts +2 -2
- package/cli/selftune/localdb/direct-write.ts +69 -6
- package/cli/selftune/localdb/queries.ts +1253 -37
- package/cli/selftune/localdb/schema.ts +66 -0
- package/cli/selftune/orchestrate.ts +32 -4
- package/cli/selftune/recover.ts +153 -0
- package/cli/selftune/repair/skill-usage.ts +363 -4
- package/cli/selftune/routes/actions.ts +35 -1
- package/cli/selftune/routes/analytics.ts +14 -0
- package/cli/selftune/routes/index.ts +1 -0
- package/cli/selftune/routes/overview.ts +150 -4
- package/cli/selftune/routes/skill-report.ts +648 -18
- package/cli/selftune/status.ts +81 -2
- package/cli/selftune/sync.ts +56 -2
- package/cli/selftune/trust-model.ts +66 -0
- package/cli/selftune/types.ts +80 -0
- package/cli/selftune/utils/skill-detection.ts +43 -0
- package/cli/selftune/utils/transcript.ts +210 -1
- package/cli/selftune/watchlist.ts +65 -0
- package/node_modules/@selftune/telemetry-contract/src/types.ts +11 -0
- package/package.json +1 -1
- package/packages/telemetry-contract/src/types.ts +11 -0
- package/packages/ui/src/components/ActivityTimeline.tsx +165 -150
- package/packages/ui/src/components/EvidenceViewer.tsx +335 -144
- package/packages/ui/src/components/EvolutionTimeline.tsx +58 -28
- package/packages/ui/src/components/OrchestrateRunsPanel.tsx +33 -16
- package/packages/ui/src/components/RecentActivityFeed.tsx +72 -41
- package/packages/ui/src/components/section-cards.tsx +12 -9
- package/packages/ui/src/primitives/card.tsx +1 -1
- package/skill/SKILL.md +40 -2
- package/skill/Workflows/AlphaUpload.md +4 -0
- package/skill/Workflows/Composability.md +64 -0
- package/skill/Workflows/Contribute.md +6 -3
- package/skill/Workflows/Contributions.md +97 -0
- package/skill/Workflows/CreatorContributions.md +74 -0
- package/skill/Workflows/Dashboard.md +31 -0
- package/skill/Workflows/Evals.md +57 -8
- package/skill/Workflows/Evolve.md +31 -13
- package/skill/Workflows/ExportCanonical.md +121 -0
- package/skill/Workflows/Hook.md +131 -0
- package/skill/Workflows/Ingest.md +7 -0
- package/skill/Workflows/Initialize.md +29 -9
- package/skill/Workflows/Orchestrate.md +27 -5
- package/skill/Workflows/Quickstart.md +94 -0
- package/skill/Workflows/Recover.md +84 -0
- package/skill/Workflows/RepairSkillUsage.md +95 -0
- package/skill/Workflows/Sync.md +18 -12
- package/skill/Workflows/Uninstall.md +82 -0
- package/skill/settings_snippet.json +11 -0
- package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +0 -2
- package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +0 -16
- package/apps/local-dashboard/dist/assets/vendor-table-pHbDxq36.js +0 -8
- package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +0 -12
|
@@ -8,10 +8,12 @@
|
|
|
8
8
|
* GET /api/health — Dashboard server health probe
|
|
9
9
|
* GET /api/v2/doctor — System health diagnostics (config, logs, hooks, evolution)
|
|
10
10
|
* GET /api/v2/overview — SQLite-backed overview payload
|
|
11
|
+
* GET /api/v2/analytics — Performance analytics (trends, rankings, heatmap)
|
|
11
12
|
* GET /api/v2/skills/:name — SQLite-backed per-skill report
|
|
12
13
|
* POST /api/actions/watch — Trigger `selftune watch` for a skill
|
|
13
14
|
* POST /api/actions/evolve — Trigger `selftune evolve` for a skill
|
|
14
15
|
* POST /api/actions/rollback — Trigger `selftune rollback` for a skill
|
|
16
|
+
* POST /api/actions/watchlist — Persist creator watchlist preferences
|
|
15
17
|
* GET /badge/:name — Skill health badge
|
|
16
18
|
* GET /report/:name — Skill health report HTML
|
|
17
19
|
*/
|
|
@@ -29,7 +31,6 @@ import type {
|
|
|
29
31
|
} from "./dashboard-contract.js";
|
|
30
32
|
import { readEvidenceTrail } from "./evolution/evidence.js";
|
|
31
33
|
import { closeSingleton, DB_PATH, getDb } from "./localdb/db.js";
|
|
32
|
-
import { materializeIncremental } from "./localdb/materialize.js";
|
|
33
34
|
import {
|
|
34
35
|
queryEvolutionAudit,
|
|
35
36
|
queryQueryLog,
|
|
@@ -40,6 +41,7 @@ import { doctor } from "./observability.js";
|
|
|
40
41
|
import type { ActionRunner } from "./routes/index.js";
|
|
41
42
|
import {
|
|
42
43
|
handleAction,
|
|
44
|
+
handleAnalytics,
|
|
43
45
|
handleBadge,
|
|
44
46
|
handleDoctor,
|
|
45
47
|
handleOrchestrateRuns,
|
|
@@ -109,6 +111,16 @@ function decodePathSegment(segment: string): string | null {
|
|
|
109
111
|
}
|
|
110
112
|
}
|
|
111
113
|
|
|
114
|
+
function allowedDashboardOrigins(hostname: string, port: number): Set<string> {
|
|
115
|
+
const origins = new Set<string>([`http://${hostname}:${port}`]);
|
|
116
|
+
if (hostname === "localhost") {
|
|
117
|
+
origins.add(`http://127.0.0.1:${port}`);
|
|
118
|
+
} else if (hostname === "127.0.0.1") {
|
|
119
|
+
origins.add(`http://localhost:${port}`);
|
|
120
|
+
}
|
|
121
|
+
return origins;
|
|
122
|
+
}
|
|
123
|
+
|
|
112
124
|
const MIME_TYPES: Record<string, string> = {
|
|
113
125
|
".html": "text/html; charset=utf-8",
|
|
114
126
|
".js": "application/javascript; charset=utf-8",
|
|
@@ -187,24 +199,21 @@ export async function startDashboardServer(
|
|
|
187
199
|
if (needsDb) {
|
|
188
200
|
try {
|
|
189
201
|
db = getDb();
|
|
190
|
-
// Materializer runs once at startup to backfill any JSONL data not yet in SQLite.
|
|
191
|
-
// After startup, hooks write directly to SQLite so re-materialization is unnecessary.
|
|
192
|
-
materializeIncremental(db);
|
|
193
202
|
} catch (error: unknown) {
|
|
194
203
|
const message = error instanceof Error ? error.message : String(error);
|
|
195
204
|
console.error(`V2 dashboard data unavailable: ${message}`);
|
|
196
205
|
}
|
|
197
206
|
}
|
|
198
207
|
|
|
199
|
-
// Hooks write directly to SQLite, so periodic
|
|
200
|
-
//
|
|
201
|
-
//
|
|
208
|
+
// Hooks and ingestors write directly to SQLite, so periodic materialization is
|
|
209
|
+
// not part of normal runtime. These remain no-ops because they are invoked
|
|
210
|
+
// from several shared request and watcher paths.
|
|
202
211
|
function refreshV2Data(): void {
|
|
203
|
-
// No-op:
|
|
212
|
+
// No-op: SQLite is already authoritative at runtime
|
|
204
213
|
}
|
|
205
214
|
|
|
206
215
|
function refreshV2DataImmediate(): void {
|
|
207
|
-
// No-op:
|
|
216
|
+
// No-op: SQLite is already authoritative at runtime
|
|
208
217
|
}
|
|
209
218
|
|
|
210
219
|
// -- SSE (Server-Sent Events) live update layer -----------------------------
|
|
@@ -259,6 +268,7 @@ export async function startDashboardServer(
|
|
|
259
268
|
let lastStatusCacheRefreshAt = 0;
|
|
260
269
|
let statusRefreshPromise: Promise<void> | null = null;
|
|
261
270
|
const STATUS_CACHE_TTL_MS = 30_000;
|
|
271
|
+
let boundPort = port;
|
|
262
272
|
|
|
263
273
|
async function refreshStatusCache(force = false): Promise<void> {
|
|
264
274
|
const cacheIsFresh =
|
|
@@ -383,8 +393,20 @@ export async function startDashboardServer(
|
|
|
383
393
|
});
|
|
384
394
|
}
|
|
385
395
|
|
|
386
|
-
// ---- POST /api/actions/{watch,evolve,rollback} ----
|
|
396
|
+
// ---- POST /api/actions/{watch,evolve,rollback,watchlist} ----
|
|
387
397
|
if (url.pathname.startsWith("/api/actions/") && req.method === "POST") {
|
|
398
|
+
const trustedActionOrigins = allowedDashboardOrigins(hostname, boundPort);
|
|
399
|
+
const origin = req.headers.get("origin");
|
|
400
|
+
if (!origin || !trustedActionOrigins.has(origin)) {
|
|
401
|
+
return Response.json(
|
|
402
|
+
{
|
|
403
|
+
success: false,
|
|
404
|
+
error:
|
|
405
|
+
"Dashboard actions only accept same-origin requests from the local dashboard UI.",
|
|
406
|
+
},
|
|
407
|
+
{ status: 403, headers: corsHeaders() },
|
|
408
|
+
);
|
|
409
|
+
}
|
|
388
410
|
const action = url.pathname.slice("/api/actions/".length);
|
|
389
411
|
let body: Record<string, unknown> = {};
|
|
390
412
|
try {
|
|
@@ -448,7 +470,7 @@ export async function startDashboardServer(
|
|
|
448
470
|
);
|
|
449
471
|
}
|
|
450
472
|
refreshV2Data();
|
|
451
|
-
return withCors(handleOverview(db, selftuneVersion));
|
|
473
|
+
return withCors(handleOverview(db, selftuneVersion, url.searchParams));
|
|
452
474
|
}
|
|
453
475
|
|
|
454
476
|
// ---- GET /api/v2/orchestrate-runs ----
|
|
@@ -469,6 +491,18 @@ export async function startDashboardServer(
|
|
|
469
491
|
return withCors(handleOrchestrateRuns(db, limit));
|
|
470
492
|
}
|
|
471
493
|
|
|
494
|
+
// ---- GET /api/v2/analytics ----
|
|
495
|
+
if (url.pathname === "/api/v2/analytics" && req.method === "GET") {
|
|
496
|
+
if (!db) {
|
|
497
|
+
return Response.json(
|
|
498
|
+
{ error: "V2 data unavailable" },
|
|
499
|
+
{ status: 503, headers: corsHeaders() },
|
|
500
|
+
);
|
|
501
|
+
}
|
|
502
|
+
refreshV2Data();
|
|
503
|
+
return withCors(handleAnalytics(db));
|
|
504
|
+
}
|
|
505
|
+
|
|
472
506
|
// ---- GET /api/v2/skills/:name ----
|
|
473
507
|
if (url.pathname.startsWith("/api/v2/skills/") && req.method === "GET") {
|
|
474
508
|
const skillName = decodePathSegment(url.pathname.slice("/api/v2/skills/".length));
|
|
@@ -495,7 +529,7 @@ export async function startDashboardServer(
|
|
|
495
529
|
);
|
|
496
530
|
}
|
|
497
531
|
refreshV2Data();
|
|
498
|
-
return withCors(handleSkillReport(db, skillName));
|
|
532
|
+
return withCors(handleSkillReport(db, skillName, url.searchParams));
|
|
499
533
|
}
|
|
500
534
|
|
|
501
535
|
// ---- SPA fallback ----
|
|
@@ -510,7 +544,7 @@ export async function startDashboardServer(
|
|
|
510
544
|
},
|
|
511
545
|
});
|
|
512
546
|
|
|
513
|
-
|
|
547
|
+
boundPort = server.port;
|
|
514
548
|
|
|
515
549
|
if (openBrowser) {
|
|
516
550
|
const url = `http://${hostname}:${boundPort}`;
|
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
|
|
3
|
+
import { parseArgs } from "node:util";
|
|
4
|
+
|
|
5
|
+
import { getDb } from "../localdb/db.js";
|
|
6
|
+
import { queryQueryLog, querySkillUsageRecords } from "../localdb/queries.js";
|
|
7
|
+
import type {
|
|
8
|
+
QueryLogRecord,
|
|
9
|
+
SkillFamilyOverlapMember,
|
|
10
|
+
SkillFamilyOverlapPair,
|
|
11
|
+
SkillFamilyOverlapReport,
|
|
12
|
+
SkillFamilyRefactorProposal,
|
|
13
|
+
SkillUsageRecord,
|
|
14
|
+
} from "../types.js";
|
|
15
|
+
import { CLIError } from "../utils/cli-error.js";
|
|
16
|
+
import {
|
|
17
|
+
findInstalledSkillNames,
|
|
18
|
+
findInstalledSkillPath,
|
|
19
|
+
findRepositoryClaudeSkillDirs,
|
|
20
|
+
findRepositorySkillDirs,
|
|
21
|
+
} from "../utils/skill-discovery.js";
|
|
22
|
+
import { buildEvalSet } from "./hooks-to-evals.js";
|
|
23
|
+
|
|
24
|
+
const DEFAULT_MIN_OVERLAP = 0.3;
|
|
25
|
+
const DEFAULT_MIN_SHARED = 2;
|
|
26
|
+
const DEFAULT_MAX_SHARED = 10;
|
|
27
|
+
|
|
28
|
+
interface FamilyOverlapOptions {
|
|
29
|
+
familyPrefix?: string;
|
|
30
|
+
parentSkillName?: string;
|
|
31
|
+
minOverlapPct?: number;
|
|
32
|
+
minSharedQueries?: number;
|
|
33
|
+
maxSharedQueries?: number;
|
|
34
|
+
searchDirs?: string[];
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function getEvalSkillSearchDirs(): string[] {
|
|
38
|
+
const cwd = process.cwd();
|
|
39
|
+
const homeDir = process.env.HOME ?? "";
|
|
40
|
+
const codexHome = process.env.CODEX_HOME ?? `${homeDir}/.codex`;
|
|
41
|
+
return [
|
|
42
|
+
...findRepositorySkillDirs(cwd),
|
|
43
|
+
...findRepositoryClaudeSkillDirs(cwd),
|
|
44
|
+
`${homeDir}/.agents/skills`,
|
|
45
|
+
`${homeDir}/.claude/skills`,
|
|
46
|
+
`${codexHome}/skills`,
|
|
47
|
+
];
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function normalizeQuery(value: string): string {
|
|
51
|
+
return value.trim().replace(/\s+/g, " ").toLowerCase();
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function inferFamilyPrefix(skills: string[]): string | undefined {
|
|
55
|
+
if (skills.length < 2) return undefined;
|
|
56
|
+
const firstPrefixes = skills.map((skill) => {
|
|
57
|
+
const hyphen = skill.indexOf("-");
|
|
58
|
+
return hyphen === -1 ? skill : skill.slice(0, hyphen + 1);
|
|
59
|
+
});
|
|
60
|
+
const candidate = firstPrefixes[0];
|
|
61
|
+
return firstPrefixes.every((prefix) => prefix === candidate) ? candidate : undefined;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function inferParentSkillName(
|
|
65
|
+
skills: string[],
|
|
66
|
+
explicitParent?: string,
|
|
67
|
+
familyPrefix?: string,
|
|
68
|
+
): string {
|
|
69
|
+
if (explicitParent?.trim()) return explicitParent.trim();
|
|
70
|
+
const inferredPrefix = familyPrefix ?? inferFamilyPrefix(skills) ?? "family";
|
|
71
|
+
return inferredPrefix.endsWith("-") ? inferredPrefix.slice(0, -1) : inferredPrefix;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function toWorkflowName(skillName: string, familyPrefix?: string): string {
|
|
75
|
+
const stripped =
|
|
76
|
+
familyPrefix && skillName.startsWith(familyPrefix)
|
|
77
|
+
? skillName.slice(familyPrefix.length)
|
|
78
|
+
: skillName;
|
|
79
|
+
return stripped.trim() || "default";
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function buildPositiveQuerySet(
|
|
83
|
+
skillName: string,
|
|
84
|
+
skillRecords: SkillUsageRecord[],
|
|
85
|
+
queryRecords: QueryLogRecord[],
|
|
86
|
+
): Set<string> {
|
|
87
|
+
const evalEntries = buildEvalSet(
|
|
88
|
+
skillRecords,
|
|
89
|
+
queryRecords,
|
|
90
|
+
skillName,
|
|
91
|
+
Number.MAX_SAFE_INTEGER,
|
|
92
|
+
false,
|
|
93
|
+
42,
|
|
94
|
+
false,
|
|
95
|
+
);
|
|
96
|
+
return new Set(
|
|
97
|
+
evalEntries
|
|
98
|
+
.filter((entry) => entry.should_trigger)
|
|
99
|
+
.map((entry) => normalizeQuery(entry.query))
|
|
100
|
+
.filter(Boolean),
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function buildMember(
|
|
105
|
+
skillName: string,
|
|
106
|
+
positiveQueries: Set<string>,
|
|
107
|
+
searchDirs: string[],
|
|
108
|
+
): SkillFamilyOverlapMember {
|
|
109
|
+
return {
|
|
110
|
+
skill_name: skillName,
|
|
111
|
+
skill_path: findInstalledSkillPath(skillName, searchDirs),
|
|
112
|
+
positive_query_count: positiveQueries.size,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function scoreConsolidationPressure(overlapPct: number): "low" | "medium" | "high" {
|
|
117
|
+
if (overlapPct >= 0.6) return "high";
|
|
118
|
+
if (overlapPct >= 0.4) return "medium";
|
|
119
|
+
return "low";
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function buildRefactorProposal(
|
|
123
|
+
skills: string[],
|
|
124
|
+
familyPrefix: string | undefined,
|
|
125
|
+
parentSkillName: string,
|
|
126
|
+
): SkillFamilyRefactorProposal {
|
|
127
|
+
const workflows = skills.map((skillName) => {
|
|
128
|
+
const workflowName = toWorkflowName(skillName, familyPrefix);
|
|
129
|
+
return {
|
|
130
|
+
workflow_name: workflowName,
|
|
131
|
+
source_skill: skillName,
|
|
132
|
+
suggested_path: `Workflows/${workflowName}.md`,
|
|
133
|
+
};
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
parent_skill_name: parentSkillName,
|
|
138
|
+
family_prefix: familyPrefix,
|
|
139
|
+
internal_workflows: workflows,
|
|
140
|
+
compatibility_aliases: workflows.map((workflow) => ({
|
|
141
|
+
skill_name: workflow.source_skill,
|
|
142
|
+
target_workflow: workflow.workflow_name,
|
|
143
|
+
})),
|
|
144
|
+
migration_notes: [
|
|
145
|
+
`Create a parent skill \`${parentSkillName}\` whose SKILL.md routes into internal workflows instead of exposing each family member as a primary top-level trigger surface.`,
|
|
146
|
+
"Keep the existing sibling skills as thin compatibility aliases for at least one release cycle while usage shifts to the parent skill.",
|
|
147
|
+
"Move execution-specific instructions into internal Workflows/ or references/ files so the parent SKILL.md stays focused on routing and progressive disclosure.",
|
|
148
|
+
"Use the compatibility aliases to measure whether trigger quality improves before removing the old skill entry points.",
|
|
149
|
+
],
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
export function analyzeSkillFamilyOverlap(
|
|
154
|
+
skills: string[],
|
|
155
|
+
skillRecords: SkillUsageRecord[],
|
|
156
|
+
queryRecords: QueryLogRecord[],
|
|
157
|
+
options: FamilyOverlapOptions = {},
|
|
158
|
+
): SkillFamilyOverlapReport {
|
|
159
|
+
if (skills.length < 2) {
|
|
160
|
+
throw new CLIError(
|
|
161
|
+
"Skill family overlap analysis requires at least 2 skills.",
|
|
162
|
+
"INVALID_FLAG",
|
|
163
|
+
"selftune eval family-overlap --skills skill-a,skill-b",
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
const searchDirs = options.searchDirs ?? getEvalSkillSearchDirs();
|
|
168
|
+
const familyPrefix = options.familyPrefix ?? inferFamilyPrefix(skills);
|
|
169
|
+
const minOverlapPct = options.minOverlapPct ?? DEFAULT_MIN_OVERLAP;
|
|
170
|
+
const minSharedQueries = options.minSharedQueries ?? DEFAULT_MIN_SHARED;
|
|
171
|
+
const maxSharedQueries = options.maxSharedQueries ?? DEFAULT_MAX_SHARED;
|
|
172
|
+
|
|
173
|
+
const positiveQueriesBySkill = new Map<string, Set<string>>();
|
|
174
|
+
const members: SkillFamilyOverlapMember[] = [];
|
|
175
|
+
for (const skillName of skills) {
|
|
176
|
+
const positives = buildPositiveQuerySet(skillName, skillRecords, queryRecords);
|
|
177
|
+
positiveQueriesBySkill.set(skillName, positives);
|
|
178
|
+
members.push(buildMember(skillName, positives, searchDirs));
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
const pairs: SkillFamilyOverlapPair[] = [];
|
|
182
|
+
for (let i = 0; i < skills.length; i++) {
|
|
183
|
+
for (let j = i + 1; j < skills.length; j++) {
|
|
184
|
+
const skillA = skills[i];
|
|
185
|
+
const skillB = skills[j];
|
|
186
|
+
const positivesA = positiveQueriesBySkill.get(skillA) ?? new Set<string>();
|
|
187
|
+
const positivesB = positiveQueriesBySkill.get(skillB) ?? new Set<string>();
|
|
188
|
+
if (positivesA.size === 0 || positivesB.size === 0) continue;
|
|
189
|
+
|
|
190
|
+
const sharedQueries = [...positivesA].filter((query) => positivesB.has(query));
|
|
191
|
+
const overlapPct = sharedQueries.length / Math.min(positivesA.size, positivesB.size);
|
|
192
|
+
if (sharedQueries.length < minSharedQueries || overlapPct < minOverlapPct) continue;
|
|
193
|
+
|
|
194
|
+
pairs.push({
|
|
195
|
+
skill_a: skillA,
|
|
196
|
+
skill_b: skillB,
|
|
197
|
+
overlap_pct: overlapPct,
|
|
198
|
+
shared_query_count: sharedQueries.length,
|
|
199
|
+
shared_queries: sharedQueries.slice(0, maxSharedQueries),
|
|
200
|
+
consolidation_pressure: scoreConsolidationPressure(overlapPct),
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
pairs.sort(
|
|
206
|
+
(a, b) => b.overlap_pct - a.overlap_pct || b.shared_query_count - a.shared_query_count,
|
|
207
|
+
);
|
|
208
|
+
|
|
209
|
+
const totalPairsAnalyzed = (skills.length * (skills.length - 1)) / 2;
|
|
210
|
+
const overlapCount = pairs.length;
|
|
211
|
+
const overlapDensity = totalPairsAnalyzed > 0 ? overlapCount / totalPairsAnalyzed : 0;
|
|
212
|
+
const averageOverlapPct =
|
|
213
|
+
overlapCount > 0 ? pairs.reduce((sum, pair) => sum + pair.overlap_pct, 0) / overlapCount : 0;
|
|
214
|
+
const readySkillCount = members.filter(
|
|
215
|
+
(member) => member.positive_query_count >= minSharedQueries,
|
|
216
|
+
).length;
|
|
217
|
+
const consolidationCandidate =
|
|
218
|
+
readySkillCount >= 2 &&
|
|
219
|
+
skills.length >= 3 &&
|
|
220
|
+
(overlapCount >= 2 || (overlapCount >= 1 && overlapDensity >= 0.5));
|
|
221
|
+
|
|
222
|
+
const parentSkillName = inferParentSkillName(skills, options.parentSkillName, familyPrefix);
|
|
223
|
+
const rationale = [
|
|
224
|
+
`${skills.length} sibling skills analyzed with ${totalPairsAnalyzed} pairwise boundary checks.`,
|
|
225
|
+
overlapCount === 0
|
|
226
|
+
? "No exact-query overlap crossed the current consolidation threshold."
|
|
227
|
+
: `${overlapCount} skill pairs share at least ${Math.round(minOverlapPct * 100)}% of their trusted positive queries.`,
|
|
228
|
+
];
|
|
229
|
+
|
|
230
|
+
if (pairs.some((pair) => pair.consolidation_pressure === "high")) {
|
|
231
|
+
rationale.push(
|
|
232
|
+
"High-overlap pairs suggest the current top-level routing surfaces are competing for the same real user intent.",
|
|
233
|
+
);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
if (readySkillCount < 2) {
|
|
237
|
+
rationale.push(
|
|
238
|
+
`Only ${readySkillCount} sibling skills currently have enough trusted positives to make a packaging call. Generate cold-start evals and gather real usage before treating this as evidence against consolidation.`,
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
if (consolidationCandidate) {
|
|
243
|
+
rationale.push(
|
|
244
|
+
"This family looks like a packaging problem, not just a wording problem. Test a parent skill with internal workflows before continuing standalone description optimization.",
|
|
245
|
+
);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return {
|
|
249
|
+
family_prefix: familyPrefix,
|
|
250
|
+
analyzed_skills: skills,
|
|
251
|
+
members,
|
|
252
|
+
pairs,
|
|
253
|
+
total_pairs_analyzed: totalPairsAnalyzed,
|
|
254
|
+
overlap_count: overlapCount,
|
|
255
|
+
overlap_density: overlapDensity,
|
|
256
|
+
average_overlap_pct: averageOverlapPct,
|
|
257
|
+
consolidation_candidate: consolidationCandidate,
|
|
258
|
+
recommendation:
|
|
259
|
+
readySkillCount < 2
|
|
260
|
+
? "Insufficient trusted telemetry to make a family-packaging call yet. Use cold-start evals plus a few days of real usage before deciding whether to consolidate."
|
|
261
|
+
: consolidationCandidate
|
|
262
|
+
? `Consider consolidating this family under a parent skill like \`${parentSkillName}\`.`
|
|
263
|
+
: "Keep the skills separate for now and continue improving boundaries at the description/workflow level.",
|
|
264
|
+
rationale,
|
|
265
|
+
refactor_proposal: consolidationCandidate
|
|
266
|
+
? buildRefactorProposal(skills, familyPrefix, parentSkillName)
|
|
267
|
+
: undefined,
|
|
268
|
+
generated_at: new Date().toISOString(),
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function parseSkillList(raw: string | undefined): string[] {
|
|
273
|
+
if (!raw) return [];
|
|
274
|
+
return raw
|
|
275
|
+
.split(",")
|
|
276
|
+
.map((value) => value.trim())
|
|
277
|
+
.filter(Boolean);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
function resolveFamilySkills(
|
|
281
|
+
explicitSkills: string[],
|
|
282
|
+
familyPrefix: string | undefined,
|
|
283
|
+
skillRecords: SkillUsageRecord[],
|
|
284
|
+
searchDirs: string[],
|
|
285
|
+
): string[] {
|
|
286
|
+
if (explicitSkills.length > 0)
|
|
287
|
+
return [...new Set(explicitSkills)].sort((a, b) => a.localeCompare(b));
|
|
288
|
+
|
|
289
|
+
if (!familyPrefix) {
|
|
290
|
+
throw new CLIError(
|
|
291
|
+
"Pass either --skills <a,b,c> or --prefix <family->.",
|
|
292
|
+
"MISSING_FLAG",
|
|
293
|
+
"selftune eval family-overlap --prefix sc-",
|
|
294
|
+
);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
const installedNames = findInstalledSkillNames(searchDirs);
|
|
298
|
+
const observedNames = new Set<string>(
|
|
299
|
+
skillRecords.map((record) => record.skill_name).filter(Boolean),
|
|
300
|
+
);
|
|
301
|
+
const familySkills = new Set<string>();
|
|
302
|
+
for (const name of [...installedNames, ...observedNames]) {
|
|
303
|
+
if (name.startsWith(familyPrefix)) familySkills.add(name);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
return [...familySkills].sort((a, b) => a.localeCompare(b));
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
export async function cliMain(): Promise<void> {
|
|
310
|
+
let values: ReturnType<typeof parseArgs>["values"];
|
|
311
|
+
try {
|
|
312
|
+
({ values } = parseArgs({
|
|
313
|
+
options: {
|
|
314
|
+
help: { type: "boolean", short: "h", default: false },
|
|
315
|
+
prefix: { type: "string" },
|
|
316
|
+
skills: { type: "string" },
|
|
317
|
+
"parent-skill": { type: "string" },
|
|
318
|
+
"min-overlap": { type: "string" },
|
|
319
|
+
"min-shared": { type: "string" },
|
|
320
|
+
},
|
|
321
|
+
strict: true,
|
|
322
|
+
}));
|
|
323
|
+
} catch (error) {
|
|
324
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
325
|
+
throw new CLIError(
|
|
326
|
+
`Invalid arguments: ${message}`,
|
|
327
|
+
"INVALID_FLAG",
|
|
328
|
+
"selftune eval family-overlap --help",
|
|
329
|
+
);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
if (values.help) {
|
|
333
|
+
console.log(`Usage:
|
|
334
|
+
selftune eval family-overlap --skills skill-a,skill-b[,skill-c]
|
|
335
|
+
selftune eval family-overlap --prefix sc-
|
|
336
|
+
|
|
337
|
+
Options:
|
|
338
|
+
--skills <a,b,c> Explicit skill names
|
|
339
|
+
--prefix <family-> Analyze installed or observed skills with this prefix
|
|
340
|
+
--parent-skill <name> Override the inferred parent skill name
|
|
341
|
+
--min-overlap <0-1> Minimum overlap percentage (default: 0.3)
|
|
342
|
+
--min-shared <n> Minimum shared queries (default: 2)
|
|
343
|
+
-h, --help Show this help
|
|
344
|
+
`);
|
|
345
|
+
return;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
const rawMinOverlap = values["min-overlap"] as string | undefined;
|
|
349
|
+
const rawMinShared = values["min-shared"] as string | undefined;
|
|
350
|
+
const minOverlapPct =
|
|
351
|
+
rawMinOverlap === undefined ? DEFAULT_MIN_OVERLAP : Number.parseFloat(rawMinOverlap);
|
|
352
|
+
const minSharedQueries =
|
|
353
|
+
rawMinShared === undefined ? DEFAULT_MIN_SHARED : Number.parseInt(rawMinShared, 10);
|
|
354
|
+
|
|
355
|
+
if (!Number.isFinite(minOverlapPct) || minOverlapPct <= 0 || minOverlapPct > 1) {
|
|
356
|
+
throw new CLIError(
|
|
357
|
+
"Invalid --min-overlap value. Use a number between 0 and 1.",
|
|
358
|
+
"INVALID_FLAG",
|
|
359
|
+
"selftune eval family-overlap --prefix sc- --min-overlap 0.3",
|
|
360
|
+
);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
if (!Number.isFinite(minSharedQueries) || minSharedQueries < 1) {
|
|
364
|
+
throw new CLIError(
|
|
365
|
+
"Invalid --min-shared value. Use a positive integer.",
|
|
366
|
+
"INVALID_FLAG",
|
|
367
|
+
"selftune eval family-overlap --prefix sc- --min-shared 2",
|
|
368
|
+
);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
const searchDirs = getEvalSkillSearchDirs();
|
|
372
|
+
const db = getDb();
|
|
373
|
+
const skillRecords = querySkillUsageRecords(db) as SkillUsageRecord[];
|
|
374
|
+
const queryRecords = queryQueryLog(db) as QueryLogRecord[];
|
|
375
|
+
const familyPrefix = (values.prefix as string | undefined)?.trim() || undefined;
|
|
376
|
+
const explicitSkills = parseSkillList(values.skills as string | undefined);
|
|
377
|
+
const skills = resolveFamilySkills(explicitSkills, familyPrefix, skillRecords, searchDirs);
|
|
378
|
+
|
|
379
|
+
if (skills.length < 2) {
|
|
380
|
+
throw new CLIError(
|
|
381
|
+
`Need at least 2 skills to analyze, found ${skills.length}.`,
|
|
382
|
+
"INVALID_FLAG",
|
|
383
|
+
"selftune eval family-overlap --prefix sc-",
|
|
384
|
+
);
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
const report = analyzeSkillFamilyOverlap(skills, skillRecords, queryRecords, {
|
|
388
|
+
familyPrefix,
|
|
389
|
+
parentSkillName: (values["parent-skill"] as string | undefined)?.trim() || undefined,
|
|
390
|
+
minOverlapPct,
|
|
391
|
+
minSharedQueries,
|
|
392
|
+
searchDirs,
|
|
393
|
+
});
|
|
394
|
+
console.log(JSON.stringify(report, null, 2));
|
|
395
|
+
}
|