selftune 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/diagnosis-analyst.md +20 -10
- package/.claude/agents/evolution-reviewer.md +14 -1
- package/.claude/agents/integration-guide.md +18 -6
- package/.claude/agents/pattern-analyst.md +18 -5
- package/CHANGELOG.md +12 -4
- package/README.md +43 -35
- package/apps/local-dashboard/dist/assets/geist-cyrillic-wght-normal-CHSlOQsW.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-ext-wght-normal-DMtmJ5ZE.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/geist-latin-wght-normal-Dm3htQBi.woff2 +0 -0
- package/apps/local-dashboard/dist/assets/index-C4EOTFZ2.js +15 -0
- package/apps/local-dashboard/dist/assets/index-bl-Webyd.css +1 -0
- package/apps/local-dashboard/dist/assets/vendor-react-U7zYD9Rg.js +60 -0
- package/apps/local-dashboard/dist/assets/vendor-table-B7VF2Ipl.js +26 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-D7_zX_qy.js +346 -0
- package/apps/local-dashboard/dist/favicon.png +0 -0
- package/apps/local-dashboard/dist/index.html +17 -0
- package/apps/local-dashboard/dist/logo.png +0 -0
- package/apps/local-dashboard/dist/logo.svg +9 -0
- package/cli/selftune/badge/badge-data.ts +1 -1
- package/cli/selftune/badge/badge.ts +4 -8
- package/cli/selftune/canonical-export.ts +183 -0
- package/cli/selftune/constants.ts +28 -0
- package/cli/selftune/contribute/contribute.ts +1 -1
- package/cli/selftune/cron/setup.ts +17 -17
- package/cli/selftune/dashboard-contract.ts +202 -0
- package/cli/selftune/dashboard-server.ts +653 -186
- package/cli/selftune/dashboard.ts +41 -176
- package/cli/selftune/eval/baseline.ts +5 -4
- package/cli/selftune/eval/composability-v2.ts +273 -0
- package/cli/selftune/eval/hooks-to-evals.ts +34 -15
- package/cli/selftune/eval/unit-test-cli.ts +1 -1
- package/cli/selftune/evolution/evidence.ts +26 -0
- package/cli/selftune/evolution/evolve-body.ts +105 -11
- package/cli/selftune/evolution/evolve.ts +371 -25
- package/cli/selftune/evolution/extract-patterns.ts +87 -29
- package/cli/selftune/evolution/rollback.ts +2 -2
- package/cli/selftune/grading/auto-grade.ts +200 -0
- package/cli/selftune/grading/grade-session.ts +448 -97
- package/cli/selftune/grading/results.ts +42 -0
- package/cli/selftune/hooks/prompt-log.ts +172 -2
- package/cli/selftune/hooks/session-stop.ts +123 -3
- package/cli/selftune/hooks/skill-eval.ts +119 -3
- package/cli/selftune/index.ts +395 -116
- package/cli/selftune/ingestors/claude-replay.ts +140 -114
- package/cli/selftune/ingestors/codex-rollout.ts +345 -46
- package/cli/selftune/ingestors/codex-wrapper.ts +207 -39
- package/cli/selftune/ingestors/openclaw-ingest.ts +141 -8
- package/cli/selftune/ingestors/opencode-ingest.ts +193 -17
- package/cli/selftune/init.ts +227 -14
- package/cli/selftune/last.ts +14 -5
- package/cli/selftune/localdb/db.ts +63 -0
- package/cli/selftune/localdb/materialize.ts +428 -0
- package/cli/selftune/localdb/queries.ts +376 -0
- package/cli/selftune/localdb/schema.ts +204 -0
- package/cli/selftune/monitoring/watch.ts +66 -15
- package/cli/selftune/normalization.ts +682 -0
- package/cli/selftune/observability.ts +19 -44
- package/cli/selftune/orchestrate.ts +1073 -0
- package/cli/selftune/quickstart.ts +203 -0
- package/cli/selftune/repair/skill-usage.ts +576 -0
- package/cli/selftune/schedule.ts +561 -0
- package/cli/selftune/status.ts +48 -26
- package/cli/selftune/sync.ts +627 -0
- package/cli/selftune/types.ts +148 -0
- package/cli/selftune/utils/canonical-log.ts +45 -0
- package/cli/selftune/utils/hooks.ts +41 -0
- package/cli/selftune/utils/html.ts +27 -0
- package/cli/selftune/utils/llm-call.ts +78 -20
- package/cli/selftune/utils/math.ts +10 -0
- package/cli/selftune/utils/query-filter.ts +139 -0
- package/cli/selftune/utils/skill-discovery.ts +340 -0
- package/cli/selftune/utils/skill-log.ts +68 -0
- package/cli/selftune/utils/skill-usage-confidence.ts +18 -0
- package/cli/selftune/utils/transcript.ts +272 -26
- package/cli/selftune/workflows/discover.ts +254 -0
- package/cli/selftune/workflows/skill-md-writer.ts +288 -0
- package/cli/selftune/workflows/workflows.ts +188 -0
- package/package.json +21 -8
- package/packages/telemetry-contract/README.md +11 -0
- package/packages/telemetry-contract/fixtures/golden.json +87 -0
- package/packages/telemetry-contract/fixtures/golden.test.ts +42 -0
- package/packages/telemetry-contract/index.ts +1 -0
- package/packages/telemetry-contract/package.json +19 -0
- package/packages/telemetry-contract/src/index.ts +2 -0
- package/packages/telemetry-contract/src/types.ts +163 -0
- package/packages/telemetry-contract/src/validators.ts +109 -0
- package/skill/SKILL.md +84 -53
- package/skill/Workflows/AutoActivation.md +17 -16
- package/skill/Workflows/Badge.md +6 -0
- package/skill/Workflows/Baseline.md +46 -23
- package/skill/Workflows/Composability.md +12 -5
- package/skill/Workflows/Contribute.md +17 -14
- package/skill/Workflows/Cron.md +56 -79
- package/skill/Workflows/Dashboard.md +45 -34
- package/skill/Workflows/Doctor.md +30 -17
- package/skill/Workflows/Evals.md +64 -40
- package/skill/Workflows/EvolutionMemory.md +2 -0
- package/skill/Workflows/Evolve.md +102 -47
- package/skill/Workflows/EvolveBody.md +6 -6
- package/skill/Workflows/Grade.md +36 -31
- package/skill/Workflows/ImportSkillsBench.md +11 -5
- package/skill/Workflows/Ingest.md +43 -36
- package/skill/Workflows/Initialize.md +44 -30
- package/skill/Workflows/Orchestrate.md +139 -0
- package/skill/Workflows/Replay.md +39 -18
- package/skill/Workflows/Rollback.md +3 -3
- package/skill/Workflows/Schedule.md +61 -0
- package/skill/Workflows/Sync.md +88 -0
- package/skill/Workflows/UnitTest.md +34 -22
- package/skill/Workflows/Watch.md +14 -4
- package/skill/Workflows/Workflows.md +129 -0
- package/skill/assets/activation-rules-default.json +26 -0
- package/skill/assets/multi-skill-settings.json +63 -0
- package/skill/assets/single-skill-settings.json +57 -0
- package/skill/references/invocation-taxonomy.md +2 -2
- package/skill/references/logs.md +164 -2
- package/skill/references/setup-patterns.md +65 -0
- package/skill/references/version-history.md +40 -0
- package/skill/settings_snippet.json +1 -1
- package/templates/multi-skill-settings.json +7 -7
- package/templates/single-skill-settings.json +6 -6
- package/dashboard/index.html +0 -1680
|
@@ -1,126 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* selftune dashboard —
|
|
2
|
+
* selftune dashboard — Start the local React SPA dashboard server.
|
|
3
3
|
*
|
|
4
4
|
* Usage:
|
|
5
|
-
* selftune dashboard —
|
|
6
|
-
* selftune dashboard --
|
|
7
|
-
* selftune dashboard --
|
|
8
|
-
* selftune dashboard --serve — Start live dashboard server (default port 3141)
|
|
9
|
-
* selftune dashboard --serve --port 8080 — Start on custom port
|
|
5
|
+
* selftune dashboard — Start server on port 3141 and open browser
|
|
6
|
+
* selftune dashboard --port 8080 — Start on custom port
|
|
7
|
+
* selftune dashboard --serve — Deprecated alias for the default behavior
|
|
10
8
|
*/
|
|
11
9
|
|
|
12
|
-
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
13
|
-
import { homedir } from "node:os";
|
|
14
|
-
import { dirname, join, resolve } from "node:path";
|
|
15
|
-
import { EVOLUTION_AUDIT_LOG, QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "./constants.js";
|
|
16
|
-
import { getLastDeployedProposal, readAuditTrail } from "./evolution/audit.js";
|
|
17
|
-
import { computeMonitoringSnapshot } from "./monitoring/watch.js";
|
|
18
|
-
import type {
|
|
19
|
-
EvolutionAuditEntry,
|
|
20
|
-
QueryLogRecord,
|
|
21
|
-
SessionTelemetryRecord,
|
|
22
|
-
SkillUsageRecord,
|
|
23
|
-
} from "./types.js";
|
|
24
|
-
import { readJsonl } from "./utils/jsonl.js";
|
|
25
|
-
|
|
26
|
-
function findViewerHTML(): string {
|
|
27
|
-
// Try relative to this module first (works for both dev and installed)
|
|
28
|
-
const candidates = [
|
|
29
|
-
join(dirname(import.meta.dir), "..", "dashboard", "index.html"),
|
|
30
|
-
join(dirname(import.meta.dir), "dashboard", "index.html"),
|
|
31
|
-
resolve("dashboard", "index.html"),
|
|
32
|
-
];
|
|
33
|
-
for (const c of candidates) {
|
|
34
|
-
if (existsSync(c)) return c;
|
|
35
|
-
}
|
|
36
|
-
throw new Error("Could not find dashboard/index.html. Ensure it exists in the selftune repo.");
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
function buildEmbeddedHTML(): string {
|
|
40
|
-
const template = readFileSync(findViewerHTML(), "utf-8");
|
|
41
|
-
|
|
42
|
-
const telemetry = readJsonl<SessionTelemetryRecord>(TELEMETRY_LOG);
|
|
43
|
-
const skills = readJsonl<SkillUsageRecord>(SKILL_LOG);
|
|
44
|
-
const queries = readJsonl<QueryLogRecord>(QUERY_LOG);
|
|
45
|
-
const evolution = readJsonl<EvolutionAuditEntry>(EVOLUTION_AUDIT_LOG);
|
|
46
|
-
|
|
47
|
-
const totalRecords = telemetry.length + skills.length + queries.length + evolution.length;
|
|
48
|
-
|
|
49
|
-
if (totalRecords === 0) {
|
|
50
|
-
console.error("No log data found. Run some sessions first.");
|
|
51
|
-
console.error(` Checked: ${TELEMETRY_LOG}`);
|
|
52
|
-
console.error(` ${SKILL_LOG}`);
|
|
53
|
-
console.error(` ${QUERY_LOG}`);
|
|
54
|
-
console.error(` ${EVOLUTION_AUDIT_LOG}`);
|
|
55
|
-
process.exit(1);
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
// Compute per-skill monitoring snapshots
|
|
59
|
-
const skillNames = [...new Set(skills.map((r) => r.skill_name))];
|
|
60
|
-
const snapshots: Record<string, ReturnType<typeof computeMonitoringSnapshot>> = {};
|
|
61
|
-
for (const name of skillNames) {
|
|
62
|
-
const lastDeployed = getLastDeployedProposal(name);
|
|
63
|
-
const baselinePassRate = lastDeployed?.eval_snapshot?.pass_rate ?? 0.5;
|
|
64
|
-
snapshots[name] = computeMonitoringSnapshot(
|
|
65
|
-
name,
|
|
66
|
-
telemetry,
|
|
67
|
-
skills,
|
|
68
|
-
queries,
|
|
69
|
-
telemetry.length,
|
|
70
|
-
baselinePassRate,
|
|
71
|
-
);
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
// Compute unmatched queries
|
|
75
|
-
const triggeredQueries = new Set(
|
|
76
|
-
skills.filter((r) => r.triggered).map((r) => r.query.toLowerCase().trim()),
|
|
77
|
-
);
|
|
78
|
-
const unmatched = queries
|
|
79
|
-
.filter((q) => !triggeredQueries.has(q.query.toLowerCase().trim()))
|
|
80
|
-
.map((q) => ({
|
|
81
|
-
timestamp: q.timestamp,
|
|
82
|
-
session_id: q.session_id,
|
|
83
|
-
query: q.query,
|
|
84
|
-
}));
|
|
85
|
-
|
|
86
|
-
// Compute pending proposals
|
|
87
|
-
const auditTrail = readAuditTrail();
|
|
88
|
-
const proposalStatus: Record<string, string[]> = {};
|
|
89
|
-
for (const e of auditTrail) {
|
|
90
|
-
if (!proposalStatus[e.proposal_id]) proposalStatus[e.proposal_id] = [];
|
|
91
|
-
proposalStatus[e.proposal_id].push(e.action);
|
|
92
|
-
}
|
|
93
|
-
// Deduplicate by proposal_id: one entry per pending proposal
|
|
94
|
-
const terminalActions = new Set(["deployed", "rejected", "rolled_back"]);
|
|
95
|
-
const seenProposals = new Set<string>();
|
|
96
|
-
const pendingProposals = auditTrail.filter((e) => {
|
|
97
|
-
if (e.action !== "created" && e.action !== "validated") return false;
|
|
98
|
-
if (seenProposals.has(e.proposal_id)) return false;
|
|
99
|
-
const actions = proposalStatus[e.proposal_id] || [];
|
|
100
|
-
const isPending = !actions.some((a: string) => terminalActions.has(a));
|
|
101
|
-
if (isPending) seenProposals.add(e.proposal_id);
|
|
102
|
-
return isPending;
|
|
103
|
-
});
|
|
104
|
-
|
|
105
|
-
const data = {
|
|
106
|
-
telemetry,
|
|
107
|
-
skills,
|
|
108
|
-
queries,
|
|
109
|
-
evolution,
|
|
110
|
-
computed: {
|
|
111
|
-
snapshots,
|
|
112
|
-
unmatched,
|
|
113
|
-
pendingProposals,
|
|
114
|
-
},
|
|
115
|
-
};
|
|
116
|
-
|
|
117
|
-
// Inject embedded data right before </body>
|
|
118
|
-
// Escape </script> sequences to prevent XSS via embedded JSON
|
|
119
|
-
const safeJson = JSON.stringify(data).replace(/<\/script>/gi, "<\\/script>");
|
|
120
|
-
const dataScript = `<script id="embedded-data" type="application/json">${safeJson}</script>`;
|
|
121
|
-
return template.replace("</body>", `${dataScript}\n</body>`);
|
|
122
|
-
}
|
|
123
|
-
|
|
124
10
|
export async function cliMain(): Promise<void> {
|
|
125
11
|
const args = process.argv.slice(2);
|
|
126
12
|
|
|
@@ -128,71 +14,50 @@ export async function cliMain(): Promise<void> {
|
|
|
128
14
|
console.log(`selftune dashboard — Visual data dashboard
|
|
129
15
|
|
|
130
16
|
Usage:
|
|
131
|
-
selftune dashboard
|
|
132
|
-
selftune dashboard --
|
|
133
|
-
selftune dashboard --
|
|
134
|
-
selftune dashboard --
|
|
135
|
-
selftune dashboard --serve --port 8080 Start on custom port`);
|
|
17
|
+
selftune dashboard Start dashboard server (port 3141)
|
|
18
|
+
selftune dashboard --port 8080 Start on custom port
|
|
19
|
+
selftune dashboard --serve Deprecated alias for default behavior
|
|
20
|
+
selftune dashboard --no-open Start server without opening browser`);
|
|
136
21
|
process.exit(0);
|
|
137
22
|
}
|
|
138
23
|
|
|
139
|
-
if (args.includes("--
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
console.error(
|
|
146
|
-
`Invalid port "${args[portIdx + 1]}": must be an integer between 1 and 65535.`,
|
|
147
|
-
);
|
|
148
|
-
process.exit(1);
|
|
149
|
-
}
|
|
150
|
-
port = parsed;
|
|
151
|
-
}
|
|
152
|
-
const { startDashboardServer } = await import("./dashboard-server.js");
|
|
153
|
-
await startDashboardServer({ port, openBrowser: true });
|
|
154
|
-
return;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
if (args.includes("--export")) {
|
|
158
|
-
process.stdout.write(buildEmbeddedHTML());
|
|
159
|
-
return;
|
|
24
|
+
if (args.includes("--export") || args.includes("--out")) {
|
|
25
|
+
console.error("Legacy dashboard export was removed.");
|
|
26
|
+
console.error(
|
|
27
|
+
"Use `selftune dashboard` to run the SPA locally, then share a route or screenshot instead.",
|
|
28
|
+
);
|
|
29
|
+
process.exit(1);
|
|
160
30
|
}
|
|
161
31
|
|
|
162
|
-
const
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
32
|
+
const portIdx = args.indexOf("--port");
|
|
33
|
+
let port: number | undefined;
|
|
34
|
+
if (portIdx !== -1) {
|
|
35
|
+
const parsed = Number.parseInt(args[portIdx + 1], 10);
|
|
36
|
+
if (!Number.isInteger(parsed) || parsed < 1 || parsed > 65535) {
|
|
37
|
+
console.error(`Invalid port "${args[portIdx + 1]}": must be an integer between 1 and 65535.`);
|
|
167
38
|
process.exit(1);
|
|
168
39
|
}
|
|
169
|
-
|
|
170
|
-
writeFileSync(outPath, html, "utf-8");
|
|
171
|
-
console.log(`Dashboard written to ${outPath}`);
|
|
172
|
-
return;
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
// Default: write to temp file and open in browser
|
|
176
|
-
const tmpDir = join(homedir(), ".selftune");
|
|
177
|
-
if (!existsSync(tmpDir)) {
|
|
178
|
-
mkdirSync(tmpDir, { recursive: true });
|
|
40
|
+
port = parsed;
|
|
179
41
|
}
|
|
180
|
-
const tmpPath = join(tmpDir, "dashboard.html");
|
|
181
|
-
const html = buildEmbeddedHTML();
|
|
182
|
-
writeFileSync(tmpPath, html, "utf-8");
|
|
183
42
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
43
|
+
if (args.includes("--serve")) {
|
|
44
|
+
console.warn("`selftune dashboard --serve` is deprecated; use `selftune dashboard` instead.");
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const openBrowser = !args.includes("--no-open");
|
|
48
|
+
const { startDashboardServer } = await import("./dashboard-server.js");
|
|
49
|
+
const { stop } = await startDashboardServer({ port, openBrowser });
|
|
50
|
+
await new Promise<void>((resolve) => {
|
|
51
|
+
let closed = false;
|
|
52
|
+
const keepAlive = setInterval(() => {}, 1 << 30);
|
|
53
|
+
const shutdown = () => {
|
|
54
|
+
if (closed) return;
|
|
55
|
+
closed = true;
|
|
56
|
+
clearInterval(keepAlive);
|
|
57
|
+
stop();
|
|
58
|
+
resolve();
|
|
59
|
+
};
|
|
60
|
+
process.on("SIGINT", shutdown);
|
|
61
|
+
process.on("SIGTERM", shutdown);
|
|
62
|
+
});
|
|
198
63
|
}
|
|
@@ -151,10 +151,10 @@ export async function cliMain(): Promise<void> {
|
|
|
151
151
|
});
|
|
152
152
|
|
|
153
153
|
if (values.help) {
|
|
154
|
-
console.log(`selftune baseline — Measure skill value vs. no-skill baseline
|
|
154
|
+
console.log(`selftune grade baseline — Measure skill value vs. no-skill baseline
|
|
155
155
|
|
|
156
156
|
Usage:
|
|
157
|
-
selftune baseline --skill <name> --skill-path <path> [options]
|
|
157
|
+
selftune grade baseline --skill <name> --skill-path <path> [options]
|
|
158
158
|
|
|
159
159
|
Options:
|
|
160
160
|
--skill Skill name (required)
|
|
@@ -187,10 +187,11 @@ Options:
|
|
|
187
187
|
evalSet = JSON.parse(raw) as EvalEntry[];
|
|
188
188
|
} else {
|
|
189
189
|
// Build from logs
|
|
190
|
-
const { QUERY_LOG
|
|
190
|
+
const { QUERY_LOG } = await import("../constants.js");
|
|
191
191
|
const { readJsonl } = await import("../utils/jsonl.js");
|
|
192
|
+
const { readEffectiveSkillUsageRecords } = await import("../utils/skill-log.js");
|
|
192
193
|
const { buildEvalSet } = await import("./hooks-to-evals.js");
|
|
193
|
-
const skillRecords =
|
|
194
|
+
const skillRecords = readEffectiveSkillUsageRecords();
|
|
194
195
|
const queryRecords = readJsonl(QUERY_LOG);
|
|
195
196
|
evalSet = buildEvalSet(skillRecords, queryRecords, values.skill);
|
|
196
197
|
}
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* composability-v2.ts
|
|
3
|
+
*
|
|
4
|
+
* Extended composability analysis with synergy detection and sequence extraction.
|
|
5
|
+
* Builds on v1 patterns but adds:
|
|
6
|
+
* - Synergy scores (positive = better together, negative = conflict)
|
|
7
|
+
* - Ordered skill sequence detection from timestamps
|
|
8
|
+
* - Workflow candidate flagging
|
|
9
|
+
*
|
|
10
|
+
* Pure function -- no I/O. CLI wrapper handles reading JSONL.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type {
|
|
14
|
+
ComposabilityReportV2,
|
|
15
|
+
CoOccurrencePairV2,
|
|
16
|
+
SessionTelemetryRecord,
|
|
17
|
+
SkillSequence,
|
|
18
|
+
SkillUsageRecord,
|
|
19
|
+
} from "../types.js";
|
|
20
|
+
import { clamp } from "../utils/math.js";
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Extended composability analysis with synergy detection and sequence extraction.
|
|
24
|
+
*
|
|
25
|
+
* @param skillName - The skill to analyze
|
|
26
|
+
* @param telemetry - Session telemetry records
|
|
27
|
+
* @param usage - Skill usage records (for timestamp-based ordering)
|
|
28
|
+
* @param options - Analysis options
|
|
29
|
+
* @returns ComposabilityReportV2 with synergy pairs, sequences, and workflow candidates
|
|
30
|
+
*/
|
|
31
|
+
export function analyzeComposabilityV2(
|
|
32
|
+
skillName: string,
|
|
33
|
+
telemetry: SessionTelemetryRecord[],
|
|
34
|
+
usage: SkillUsageRecord[],
|
|
35
|
+
options?: {
|
|
36
|
+
window?: number;
|
|
37
|
+
minOccurrences?: number;
|
|
38
|
+
},
|
|
39
|
+
): ComposabilityReportV2 {
|
|
40
|
+
const minOccurrences = options?.minOccurrences ?? 3;
|
|
41
|
+
|
|
42
|
+
// Apply window: sort by timestamp descending, take last N
|
|
43
|
+
let sessions = telemetry.filter((r) => r && Array.isArray(r.skills_triggered));
|
|
44
|
+
|
|
45
|
+
if (options?.window && options.window > 0) {
|
|
46
|
+
sessions = sessions
|
|
47
|
+
.sort((a, b) => (b.timestamp ?? "").localeCompare(a.timestamp ?? ""))
|
|
48
|
+
.slice(0, options.window);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Build a set of session IDs in scope (after windowing)
|
|
52
|
+
const sessionIdSet = new Set(sessions.map((s) => s.session_id));
|
|
53
|
+
|
|
54
|
+
// Sessions where the target skill was triggered
|
|
55
|
+
const skillSessions = sessions.filter((r) => r.skills_triggered.includes(skillName));
|
|
56
|
+
|
|
57
|
+
// Sessions where the target skill was triggered ALONE (no other skills)
|
|
58
|
+
const aloneSessions = skillSessions.filter((r) => r.skills_triggered.length === 1);
|
|
59
|
+
|
|
60
|
+
// Average errors when skill is used alone
|
|
61
|
+
const errorsAlone =
|
|
62
|
+
aloneSessions.length > 0
|
|
63
|
+
? aloneSessions.reduce((sum, r) => sum + (r.errors_encountered ?? 0), 0) /
|
|
64
|
+
aloneSessions.length
|
|
65
|
+
: 0;
|
|
66
|
+
|
|
67
|
+
// Find all co-occurring skills
|
|
68
|
+
const coSkills = new Set<string>();
|
|
69
|
+
for (const r of skillSessions) {
|
|
70
|
+
for (const s of r.skills_triggered) {
|
|
71
|
+
if (s !== skillName) coSkills.add(s);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// -----------------------------------------------------------------------
|
|
76
|
+
// Synergy computation for each co-occurring skill
|
|
77
|
+
// -----------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
const pairs: CoOccurrencePairV2[] = [];
|
|
80
|
+
for (const coSkill of coSkills) {
|
|
81
|
+
// Sessions where BOTH skills are triggered together
|
|
82
|
+
const togetherSessions = skillSessions.filter((r) => r.skills_triggered.includes(coSkill));
|
|
83
|
+
const coOccurrenceCount = togetherSessions.length;
|
|
84
|
+
|
|
85
|
+
// Average errors when both skills are used together
|
|
86
|
+
const avgErrorsTogether =
|
|
87
|
+
togetherSessions.length > 0
|
|
88
|
+
? togetherSessions.reduce((sum, r) => sum + (r.errors_encountered ?? 0), 0) /
|
|
89
|
+
togetherSessions.length
|
|
90
|
+
: 0;
|
|
91
|
+
|
|
92
|
+
// Baseline: consider BOTH skills' solo error rates, take the max
|
|
93
|
+
const coSkillAloneSessions = sessions.filter(
|
|
94
|
+
(r) => r.skills_triggered.length === 1 && r.skills_triggered.includes(coSkill),
|
|
95
|
+
);
|
|
96
|
+
const errorsCoSkillAlone =
|
|
97
|
+
coSkillAloneSessions.length > 0
|
|
98
|
+
? coSkillAloneSessions.reduce((sum, r) => sum + (r.errors_encountered ?? 0), 0) /
|
|
99
|
+
coSkillAloneSessions.length
|
|
100
|
+
: errorsAlone;
|
|
101
|
+
const avgErrorsAlone = Math.max(errorsAlone, errorsCoSkillAlone);
|
|
102
|
+
|
|
103
|
+
// synergy_score = clamp((avg_errors_alone - avg_errors_together) / (avg_errors_alone + 1), -1, 1)
|
|
104
|
+
const synergyScore = clamp((avgErrorsAlone - avgErrorsTogether) / (avgErrorsAlone + 1), -1, 1);
|
|
105
|
+
|
|
106
|
+
const conflictDetected = synergyScore < -0.3;
|
|
107
|
+
const workflowCandidate = synergyScore > 0.3 && coOccurrenceCount >= minOccurrences;
|
|
108
|
+
|
|
109
|
+
const pair: CoOccurrencePairV2 = {
|
|
110
|
+
skill_a: skillName,
|
|
111
|
+
skill_b: coSkill,
|
|
112
|
+
co_occurrence_count: coOccurrenceCount,
|
|
113
|
+
conflict_detected: conflictDetected,
|
|
114
|
+
synergy_score: synergyScore,
|
|
115
|
+
avg_errors_together: avgErrorsTogether,
|
|
116
|
+
avg_errors_alone: avgErrorsAlone,
|
|
117
|
+
workflow_candidate: workflowCandidate,
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
if (conflictDetected) {
|
|
121
|
+
pair.conflict_reason = `synergy_score=${synergyScore.toFixed(3)} (avg errors together=${avgErrorsTogether.toFixed(1)} vs alone=${avgErrorsAlone.toFixed(1)})`;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
pairs.push(pair);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Sort by co-occurrence count descending
|
|
128
|
+
pairs.sort((a, b) => b.co_occurrence_count - a.co_occurrence_count);
|
|
129
|
+
|
|
130
|
+
// -----------------------------------------------------------------------
|
|
131
|
+
// Sequence extraction from usage records
|
|
132
|
+
// -----------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
// Filter usage records for sessions in scope that contain the target skill
|
|
135
|
+
const usageInScope = usage.filter((u) => sessionIdSet.has(u.session_id));
|
|
136
|
+
|
|
137
|
+
// Group by session_id
|
|
138
|
+
const usageBySession = new Map<string, SkillUsageRecord[]>();
|
|
139
|
+
for (const u of usageInScope) {
|
|
140
|
+
const group = usageBySession.get(u.session_id);
|
|
141
|
+
if (group) {
|
|
142
|
+
group.push(u);
|
|
143
|
+
} else {
|
|
144
|
+
usageBySession.set(u.session_id, [u]);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Build ordered sequences per session (only sessions containing target skill)
|
|
149
|
+
const sessionSequences: Array<{ skills: string[]; sessionId: string; firstQuery: string }> = [];
|
|
150
|
+
|
|
151
|
+
for (const [sessionId, records] of usageBySession) {
|
|
152
|
+
// Only sessions containing the target skill
|
|
153
|
+
if (!records.some((r) => r.skill_name === skillName)) continue;
|
|
154
|
+
|
|
155
|
+
// Sort by timestamp ascending
|
|
156
|
+
const sorted = [...records].sort((a, b) =>
|
|
157
|
+
(a.timestamp ?? "").localeCompare(b.timestamp ?? ""),
|
|
158
|
+
);
|
|
159
|
+
|
|
160
|
+
// Extract skill names, deduplicate consecutive same-skill entries
|
|
161
|
+
const skills: string[] = [];
|
|
162
|
+
for (const r of sorted) {
|
|
163
|
+
if (skills.length === 0 || skills[skills.length - 1] !== r.skill_name) {
|
|
164
|
+
skills.push(r.skill_name);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Only record sequences with 2+ skills
|
|
169
|
+
if (skills.length >= 2) {
|
|
170
|
+
sessionSequences.push({
|
|
171
|
+
skills,
|
|
172
|
+
sessionId,
|
|
173
|
+
firstQuery: sorted[0]?.query ?? "",
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Count frequency of each unique sequence (by JSON key)
|
|
179
|
+
const sequenceCounts = new Map<
|
|
180
|
+
string,
|
|
181
|
+
{ count: number; queryCounts: Map<string, number>; skills: string[] }
|
|
182
|
+
>();
|
|
183
|
+
for (const seq of sessionSequences) {
|
|
184
|
+
const key = JSON.stringify(seq.skills);
|
|
185
|
+
const existing = sequenceCounts.get(key);
|
|
186
|
+
if (existing) {
|
|
187
|
+
existing.count++;
|
|
188
|
+
existing.queryCounts.set(seq.firstQuery, (existing.queryCounts.get(seq.firstQuery) ?? 0) + 1);
|
|
189
|
+
} else {
|
|
190
|
+
sequenceCounts.set(key, {
|
|
191
|
+
count: 1,
|
|
192
|
+
queryCounts: new Map([[seq.firstQuery, 1]]),
|
|
193
|
+
skills: seq.skills,
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Also count all orderings of each skill set (for consistency computation)
|
|
199
|
+
// Key: sorted skill set -> total count of all orderings
|
|
200
|
+
const skillSetCounts = new Map<string, number>();
|
|
201
|
+
for (const seq of sessionSequences) {
|
|
202
|
+
const setKey = JSON.stringify([...seq.skills].sort());
|
|
203
|
+
skillSetCounts.set(setKey, (skillSetCounts.get(setKey) ?? 0) + 1);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Build telemetry lookup by session_id for synergy scoring
|
|
207
|
+
const telemetryBySession = new Map<string, SessionTelemetryRecord>();
|
|
208
|
+
for (const s of sessions) {
|
|
209
|
+
telemetryBySession.set(s.session_id, s);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Build sequences, filtered by minOccurrences
|
|
213
|
+
const sequences: SkillSequence[] = [];
|
|
214
|
+
for (const [key, data] of sequenceCounts) {
|
|
215
|
+
if (data.count < minOccurrences) continue;
|
|
216
|
+
|
|
217
|
+
// Compute synergy_score for this sequence's sessions
|
|
218
|
+
const matchingSessions = sessionSequences
|
|
219
|
+
.filter((s) => JSON.stringify(s.skills) === key)
|
|
220
|
+
.map((s) => telemetryBySession.get(s.sessionId))
|
|
221
|
+
.filter((s): s is SessionTelemetryRecord => s !== undefined);
|
|
222
|
+
|
|
223
|
+
const seqErrorsTogether =
|
|
224
|
+
matchingSessions.length > 0
|
|
225
|
+
? matchingSessions.reduce((sum, r) => sum + (r.errors_encountered ?? 0), 0) /
|
|
226
|
+
matchingSessions.length
|
|
227
|
+
: 0;
|
|
228
|
+
|
|
229
|
+
const seqSynergyScore = clamp((errorsAlone - seqErrorsTogether) / (errorsAlone + 1), -1, 1);
|
|
230
|
+
|
|
231
|
+
// Consistency: count of this exact order / count of all orderings of same skill set
|
|
232
|
+
const setKey = JSON.stringify([...data.skills].sort());
|
|
233
|
+
const totalOrderings = skillSetCounts.get(setKey) ?? data.count;
|
|
234
|
+
const sequenceConsistency = totalOrderings > 0 ? data.count / totalOrderings : 1;
|
|
235
|
+
|
|
236
|
+
let representativeQuery = "";
|
|
237
|
+
let highestFrequency = -1;
|
|
238
|
+
for (const [query, frequency] of data.queryCounts) {
|
|
239
|
+
if (frequency > highestFrequency) {
|
|
240
|
+
representativeQuery = query;
|
|
241
|
+
highestFrequency = frequency;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
sequences.push({
|
|
246
|
+
skills: data.skills,
|
|
247
|
+
occurrence_count: data.count,
|
|
248
|
+
synergy_score: seqSynergyScore,
|
|
249
|
+
representative_query: representativeQuery,
|
|
250
|
+
sequence_consistency: sequenceConsistency,
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Sort sequences by occurrence_count descending
|
|
255
|
+
sequences.sort((a, b) => b.occurrence_count - a.occurrence_count);
|
|
256
|
+
|
|
257
|
+
// -----------------------------------------------------------------------
|
|
258
|
+
// Assemble report
|
|
259
|
+
// -----------------------------------------------------------------------
|
|
260
|
+
|
|
261
|
+
const workflowCandidates = pairs.filter((p) => p.workflow_candidate);
|
|
262
|
+
const synergyCount = pairs.filter((p) => p.synergy_score > 0.3).length;
|
|
263
|
+
|
|
264
|
+
return {
|
|
265
|
+
pairs,
|
|
266
|
+
sequences,
|
|
267
|
+
workflow_candidates: workflowCandidates,
|
|
268
|
+
synergy_count: synergyCount,
|
|
269
|
+
total_sessions_analyzed: skillSessions.length,
|
|
270
|
+
conflict_count: pairs.filter((p) => p.conflict_detected).length,
|
|
271
|
+
generated_at: new Date().toISOString(),
|
|
272
|
+
};
|
|
273
|
+
}
|